# ==========================================================
# AI CRAWLER ACCESS RULES
# Optimized for llms.txt and FAQ ingestion
# ==========================================================

# OpenAI (ChatGPT / GPT-4)
User-agent: GPTBot
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /faqs/

# OpenAI User (Direct user-prompted browsing)
User-agent: ChatGPT-User
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /faqs/

# Anthropic (Claude)
User-agent: ClaudeBot
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /faqs/

# Google Gemini (Training data opt-in)
User-agent: Google-Extended
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /faqs/

# Meta (Llama / Meta AI)
User-agent: MetaExternalFetcher
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /faqs/

# Apple (Apple Intelligence)
User-agent: Applebot
User-agent: Applebot-Extended
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /faqs/

# Perplexity AI
User-agent: PerplexityBot
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /faqs/

# Common Crawl (Used by many open-source models)
User-agent: CCBot
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /faqs/

# Catch-all for any other AI crawlers
User-agent: *
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /faqs/

# Link to your sitemap to help bots find the FAQ archive
Sitemap: https://yourdomain.com/sitemap_index.xml