Sitemap: https://www.intechopen.com/sitemap-v2/sitemap.xml

# =============================================================
# IntechOpen — robots.txt
# Last updated: March 2026
# =============================================================

# ===== DEFAULT: Allow all standard crawlers =====
User-agent: *
Allow: /
Disallow: /_preview

# =============================================================
# AI SEARCH CRAWLERS — Allow
# These bots surface content in AI-powered search results
# and cite sources with backlinks. Allowing them increases
# discoverability for researchers using AI search tools.
# =============================================================

# OpenAI — ChatGPT Search (retrieval only, does NOT train models)
User-agent: OAI-SearchBot
Allow: /

# Perplexity AI Search (always cites sources with links)
User-agent: PerplexityBot
Allow: /

# Google — AI Overviews / Gemini
User-agent: Google-Extended
Allow: /

# Apple Intelligence
User-agent: Applebot-Extended
Allow: /

# Amazon — Alexa AI / Rufus
User-agent: Amazonbot
Allow: /

# You.com AI Search
User-agent: YouBot
Allow: /

# =============================================================
# AI TRAINING SCRAPERS — Block
# These bots collect content to train AI models.
# =============================================================

# OpenAI — GPTBot (training crawler; ChatGPT Search is handled by OAI-SearchBot above)
User-agent: GPTBot
Disallow: /

# Common Crawl (used to train many open-source LLMs)
User-agent: CCBot
Disallow: /

# Anthropic — ClaudeBot (training crawler)
User-agent: ClaudeBot
Disallow: /

# ByteDance / TikTok
User-agent: Bytespider
Disallow: /

# Diffbot
User-agent: Diffbot
Disallow: /

# Meta / Facebook (training)
User-agent: FacebookBot
Disallow: /
User-agent: Meta-ExternalAgent
Disallow: /

# Omgili / Webz.io
User-agent: omgili
Disallow: /

# Applebot (training; Applebot-Extended above handles Apple Search)
User-agent: Applebot
Disallow: /