User-agent: * Allow: / # Block technical paths that never contain public content. Disallow: /pp/ Disallow: /s2custom/ Disallow: /sites/all/misc/pp/ Disallow: /sites/all/misc/s2custom/ Disallow: /searchform Disallow: /assets/ Disallow: /api/ # ---------- AI training crawlers ---------------------------------------------- # Policy: allow well-identified AI crawlers that respect this file; users # benefit from Letuška content appearing in AI-assisted travel search. # Blocked paths above still apply (inherited via `User-agent: *`). # # Crawlers we explicitly allow: User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / User-agent: Google-Extended Allow: / User-agent: GoogleOther Allow: / User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / User-agent: Applebot-Extended Allow: / User-agent: CCBot Allow: / User-agent: Amazonbot Allow: / User-agent: Bytespider Allow: / User-agent: DuckAssistBot Allow: / User-agent: YouBot Allow: / User-agent: Meta-ExternalAgent Allow: / User-agent: cohere-ai Allow: / User-agent: cohere-training-data-crawler Allow: / User-agent: FacebookBot Allow: / User-agent: MistralAI-User Allow: / # ---------- AI-specific blocks ------------------------------------------------- # Keep MCP server off general crawlers (it is not content — it is an API). # Agents should reach it explicitly via the MCP endpoint advertised in llms.txt. User-agent: * Disallow: /mcp # ---------- Discovery documents ----------------------------------------------- Sitemap: https://www.letuska.cz/sitemap.xml # llms.txt — AI navigation guide: https://www.letuska.cz/llms.txt