# ============================================================
# robots.txt for apexanalytix.com
# ============================================================

# ============================================================
# GENERAL RULES (all crawlers)
# ============================================================
User-agent: *
Disallow: /wp-admin/
Disallow: /?type=
Disallow: /?topic=
Disallow: /?product=
Disallow: /?blaid=
Disallow: /?data=
Disallow: /?elq=
Disallow: /?from=
Disallow: /?hsa_acc=
Disallow: /?ref
Disallow: /?source=post_page
Disallow: /?tpcc=
Disallow: /?trk=
Disallow: /?utm_campaign=
Disallow: /?utm_source=
Disallow: /?wchannelid=
Disallow: /?wmediaid=
Disallow: /?wtime=

Allow: /wp-admin/admin-ajax.php

# ============================================================
# LLM & AI CRAWLERS — Explicitly Allowed
# ============================================================

# --- OpenAI (ChatGPT) ---
# GPTBot: trains OpenAI's models (GPT-4o, etc.)
User-agent: GPTBot
Allow: /

# OAI-SearchBot: powers ChatGPT Search results (not training)
User-agent: OAI-SearchBot
Allow: /

# ChatGPT-User: user-driven browsing within ChatGPT
User-agent: ChatGPT-User
Allow: /

# --- Anthropic (Claude) ---
# ClaudeBot: primary Anthropic training/indexing crawler
User-agent: ClaudeBot
Allow: /

# anthropic-ai: alternate Anthropic crawler identifier
User-agent: anthropic-ai
Allow: /

# Claude-Web: legacy Anthropic crawler identifier
User-agent: Claude-Web
Allow: /

# --- Google AI ---
# Google-Extended: Google's AI training crawler (Gemini, Vertex AI)
User-agent: Google-Extended
Allow: /

# --- Perplexity AI ---
# PerplexityBot: crawls for Perplexity's AI search results
User-agent: PerplexityBot
Allow: /

# --- Apple ---
# Applebot-Extended: Apple's AI/ML training crawler
User-agent: Applebot-Extended
Allow: /

# --- Meta (Facebook / Instagram) ---
# Meta-ExternalAgent: Meta's AI crawler (Llama models)
User-agent: Meta-ExternalAgent
Allow: /

# --- Amazon ---
# Amazonbot: Amazon's AI/Alexa training crawler
User-agent: Amazonbot
Allow: /

# --- Common Crawl ---
# CCBot: open dataset used to train many LLMs (GPT-3, Llama, etc.)
User-agent: CCBot
Allow: /

# --- Cohere ---
# cohere-ai: Cohere's AI training crawler
User-agent: cohere-ai
Allow: /

# --- xAI (Grok) ---
# Grok: xAI's crawler for Grok AI assistant
User-agent: Grok
Allow: /

# --- DuckDuckGo ---
# DuckAssistBot: DuckDuckGo's AI assistant crawler
User-agent: DuckAssistBot
Allow: /

# --- Quora (Poe) ---
# QuoraBot: Quora's AI crawler (Poe platform)
User-agent: QuoraBot
Allow: /

# --- Bytedance ---
# Bytespider: ByteDance AI crawler
User-agent: Bytespider
Allow: /

# ============================================================
# LLM CONTENT INDEX
# ============================================================
# Summary: https://www.apexanalytix.com/llms.txt
# Full index: https://www.apexanalytix.com/llms-full.txt

# ============================================================
# SITEMAP
# ============================================================
Sitemap: https://www.apexanalytix.com/sitemap_index.xml