# Generic user agents — full access
User-agent: *
Disallow:
# Content-Signals (contentsignals.org / draft-romm-aipref-contentsignals):
#   search=yes    — OK to use as source for search indexing
#   ai-input=yes  — OK to use as retrieval-time context for AI assistants
#   ai-train=no   — NOT authorized for model training without explicit agreement
# Load-bearing for IsAgentReady botAccessControl.contentSignals check —
# the scanner greps for `Content-Signal:` in robots.txt. Removing this drops
# the site from Level 5 "Agent-Native" to Level 1 "Basic Web Presence".
# Lighthouse SEO audit docks robots.txt to 92/100 because its strict parser
# rejects this draft directive as unknown; the trade is worth it.
Content-Signal: search=yes, ai-input=yes, ai-train=no

# --- Explicit AI-crawler directives ---
# Allow training and retrieval-time fetches. DexPaprika content is public data.
# Disable a specific crawler by changing its block to `Disallow: /`.

User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: CCBot
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: YouBot
Allow: /

User-agent: Applebot
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: Amazonbot
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: DuckAssistBot
Allow: /

User-agent: FacebookBot
Allow: /

User-agent: Meta-ExternalAgent
Allow: /

User-agent: Bytespider
Allow: /

User-agent: Diffbot
Allow: /

# --- Sitemaps ---
Sitemap: https://dexpaprika.com/sitemap.xml
Sitemap: https://static.dexpaprika.com/dexpaprika-static/sitemaps/sitemap.xml