# ==============================================================
# SECTION 1: BLOCKED CRAWLERS
# Training-only bots with no user-facing AI product or
# citation value. Disallow: / blocks all crawling.
# ==============================================================

# Common Crawl — no user-facing product. Upstream training
# data source for many LLMs (Llama, Mistral, others).
# No search surface. No citation or visibility value.
User-agent: CCBot
Disallow: /

# ByteDance / TikTok — opaque purpose, suspected training
# data collection. No verified user-facing AI search product.
# No published IP ranges — UA match is only control available.
User-agent: Bytespider
Disallow: /


# ==============================================================
# SECTION 2: GOOGLE
#
# Googlebot covers Search, Discover, AI Overviews, and AI
# Mode. Google does not deploy a separate crawler for AI
# features — Googlebot handles all surfaces.
#
# Google-Extended is a robots.txt directive only — it is NOT
# a crawling UA and will never appear in access logs.
# It controls whether Googlebot-crawled content is used by
# Gemini Apps and Vertex AI generative APIs. Allow: / ensures
# full Ensora content is eligible for Google AI surfaces.
# Disallowing Google-Extended does NOT affect rankings.
#
# GoogleOther is Google's generic R&D crawler. It follows
# Googlebot rules unless explicitly overridden.
# ==============================================================

User-agent: Googlebot
Allow: /wp-content/
Allow: /wp-includes/
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/

User-agent: GoogleOther
Allow: /wp-content/
Allow: /wp-includes/
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /_builder/

# robots.txt directive only — not a crawling UA.
# Allows crawled content to be used in Gemini and Vertex AI.
User-agent: Google-Extended
Allow: /


# ==============================================================
# SECTION 3: MICROSOFT BING / COPILOT
#
# bingbot powers the Bing Search index and Copilot grounding.
# Microsoft does not use a separate crawler for Copilot —
# same infrastructure as Bing Search.
# ==============================================================

User-agent: bingbot
Allow: /wp-content/
Allow: /wp-includes/
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/


# ==============================================================
# SECTION 4: OPENAI / CHATGPT
#
# GPTBot: training crawler. Allowing signals content
# availability for future ChatGPT model knowledge updates.
#
# OAI-SearchBot: builds the ChatGPT Search index. Primary
# agent for ChatGPT citation and GEO optimisation.
# NOT used for model training.
#
# ChatGPT-User: live page fetch triggered by a real user
# query. High-demand signal — equivalent to a high-intent
# impression. NOT used for training.
# ==============================================================

User-agent: GPTBot
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/

User-agent: OAI-SearchBot
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/

User-agent: ChatGPT-User
Disallow: /wp-admin/
Disallow: /wp-login.php


# ==============================================================
# SECTION 5: ANTHROPIC / CLAUDE
#
# ClaudeBot: training crawler. Allowing builds Claude's
# knowledge of Ensora across future model versions.
#
# Claude-SearchBot: NOTE — this UA string is UNVERIFIED against
# Anthropic's published crawler documentation. Anthropic's
# documented crawlers are ClaudeBot and Claude-User only.
# Verify before deployment. Remove this block if unconfirmed.
#
# Claude-User: live fetch triggered by real user queries.
# High-demand signal.
#
# NOTE: Anthropic does not publish IP ranges. UA-string
# matching is the only supported control mechanism for
# all Anthropic crawlers.
# ==============================================================

User-agent: ClaudeBot
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/

# UNVERIFIED UA STRING — confirm against Anthropic docs before deploying
User-agent: Claude-SearchBot
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /wp-json/
Disallow: /?s=

User-agent: Claude-User
Disallow: /wp-admin/
Disallow: /wp-login.php


# ==============================================================
# SECTION 6: PERPLEXITY
#
# PerplexityBot: builds the Perplexity answer index.
# NOT used for model training.
#
# Perplexity-User: live fetch triggered by a user query.
# High-demand signal — content surfaced directly in answers.
#
# NOTE: Documented instances of Perplexity operating outside
# published IP ranges using undisclosed UAs. UA-string
# matching is more reliable than CIDR filtering for this
# provider.
# ==============================================================

User-agent: PerplexityBot
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/

User-agent: Perplexity-User
Disallow: /wp-admin/
Disallow: /wp-login.php


# ==============================================================
# SECTION 7: APPLE
#
# Applebot: powers Siri, Spotlight Search, and Safari
# Suggestions. Follows Googlebot rules if Applebot is not
# separately specified — this block overrides that behaviour.
#
# Applebot-Extended is a robots.txt directive only — it is
# NOT a crawling UA and will never appear in access logs.
# Controls whether Applebot-crawled content trains Apple
# Intelligence foundation models. Allow: / ensures full
# Ensora content is eligible for Apple AI features.
# ==============================================================

User-agent: Applebot
Allow: /wp-content/
Allow: /wp-includes/
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/

# robots.txt directive only — not a crawling UA.
# Allows crawled content to be used in Apple Intelligence.
User-agent: Applebot-Extended
Allow: /


# ==============================================================
# SECTION 8: DUCKDUCKGO
#
# DuckDuckBot covers the DuckDuckGo organic search index.
# DuckAssist (DuckDuckGo AI) is powered by the Bing index —
# bingbot allowlisting in Section 3 covers that surface.
# ==============================================================

User-agent: DuckDuckBot
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/


# ==============================================================
# SECTION 9: AMAZON / ALEXA / RUFUS
#
# Amazonbot: general Amazon crawling; may train Amazon AI.
#
# Amzn-SearchBot: UNVERIFIED UA STRING — this user agent
# is not confirmed in Amazon's published crawler documentation.
# Amazon's documented crawler is Amazonbot. Verify before
# deploying. Remove this block if unconfirmed.
#
# Amzn-User: live fetch triggered by Alexa queries and
# similar Amazon product actions.
# ==============================================================

User-agent: Amazonbot
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=

# UNVERIFIED UA STRING — confirm against Amazon docs before deploying
User-agent: Amzn-SearchBot
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /wp-json/
Disallow: /?s=

User-agent: Amzn-User
Disallow: /wp-admin/
Disallow: /wp-login.php


# ==============================================================
# SECTION 10: YANDEX
# ==============================================================

User-agent: YandexBot
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/


# ==============================================================
# SECTION 11: GLOBAL FALLBACK
#
# Applies to all crawlers not explicitly addressed above.
# Blocks standard WordPress low-value paths to preserve
# crawl budget for high-value content.
# Explicitly allows asset directories so crawlers can
# accurately render page context.
#
# DEVELOPER HANDOFF — ADD ADDITIONAL DISALLOW RULES HERE:
# Common WordPress additions to consider:
#   Disallow: /cart/
#   Disallow: /checkout/
#   Disallow: /my-account/
#   Disallow: /thank-you/
#   Disallow: /staging/
#   Disallow: /*?*        (blocks all query string URLs)
# Add any Ensora-specific low-value paths in this section.
#
# DEV CONFIRM REQUIRED:
#   Disallow: /_builder/              — reinstate if page builder path is active on live site
#   Allow: /wp-admin/admin-ajax.php   — reinstate if front-end functionality depends on it
# ==============================================================

User-agent: *
Allow: /wp-content/
Allow: /wp-includes/
Disallow: /wp-admin/
Disallow: /wp-login.php
Disallow: /xmlrpc.php
Disallow: /wp-json/
Disallow: /?s=
Disallow: /feed/
Disallow: /trackback/
Disallow: /author/
Disallow: /_builder/


# ==============================================================
# SITEMAP
# ==============================================================

Sitemap: https://ensorahealth.com/sitemap_index.xml