User-agent: *
Allow: /

# Block technical paths that never contain public content.
Disallow: /pp/
Disallow: /s2custom/
Disallow: /sites/all/misc/pp/
Disallow: /sites/all/misc/s2custom/
Disallow: /searchform
Disallow: /assets/
Disallow: /api/

# ---------- AI training crawlers ----------------------------------------------
# Policy: allow well-identified AI crawlers that respect this file; users
# benefit from Letuška content appearing in AI-assisted travel search.
# Blocked paths above still apply (inherited via `User-agent: *`).
#
# Crawlers we explicitly allow:
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: GoogleOther
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: CCBot
Allow: /

User-agent: Amazonbot
Allow: /

User-agent: Bytespider
Allow: /

User-agent: DuckAssistBot
Allow: /

User-agent: YouBot
Allow: /

User-agent: Meta-ExternalAgent
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: cohere-training-data-crawler
Allow: /

User-agent: FacebookBot
Allow: /

User-agent: MistralAI-User
Allow: /

# ---------- AI-specific blocks -------------------------------------------------
# Keep MCP server off general crawlers (it is not content — it is an API).
# Agents should reach it explicitly via the MCP endpoint advertised in llms.txt.
User-agent: *
Disallow: /mcp

# ---------- Discovery documents -----------------------------------------------
Sitemap: https://www.letuska.cz/sitemap.xml
# llms.txt — AI navigation guide: https://www.letuska.cz/llms.txt