# Generic user agents — full access User-agent: * Disallow: # Content-Signals (contentsignals.org / draft-romm-aipref-contentsignals): # search=yes — OK to use as source for search indexing # ai-input=yes — OK to use as retrieval-time context for AI assistants # ai-train=no — NOT authorized for model training without explicit agreement # Load-bearing for IsAgentReady botAccessControl.contentSignals check — # the scanner greps for `Content-Signal:` in robots.txt. Removing this drops # the site from Level 5 "Agent-Native" to Level 1 "Basic Web Presence". # Lighthouse SEO audit docks robots.txt to 92/100 because its strict parser # rejects this draft directive as unknown; the trade is worth it. Content-Signal: search=yes, ai-input=yes, ai-train=no # --- Explicit AI-crawler directives --- # Allow training and retrieval-time fetches. DexPaprika content is public data. # Disable a specific crawler by changing its block to `Disallow: /`. User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: anthropic-ai Allow: / User-agent: Google-Extended Allow: / User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: CCBot Allow: / User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / User-agent: YouBot Allow: / User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / User-agent: Amazonbot Allow: / User-agent: cohere-ai Allow: / User-agent: DuckAssistBot Allow: / User-agent: FacebookBot Allow: / User-agent: Meta-ExternalAgent Allow: / User-agent: Bytespider Allow: / User-agent: Diffbot Allow: / # --- Sitemaps --- Sitemap: https://dexpaprika.com/sitemap.xml Sitemap: https://static.dexpaprika.com/dexpaprika-static/sitemaps/sitemap.xml