# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# Optimized robots.txt for AI Bot Accessibility

# === HIGH PRIORITY AI BOTS (Recommended: Allow) ===
# GPTBot - OpenAI's crawlers for ChatGPT training data and real-time search. OAI-SearchBot handles live web browsing, GPTBot for training data.
User-agent: GPTBot
Allow: /

# OAI-SearchBot - OpenAI's crawlers for ChatGPT training data and real-time search. OAI-SearchBot handles live web browsing, GPTBot for training data.
User-agent: OAI-SearchBot
Allow: /

# PerplexityBot - Perplexity AI's real-time web crawler that provides current information for AI answers. Blocking prevents your site from appearing in Perplexity search results.
User-agent: PerplexityBot
Allow: /

# Google-Extended - Google's crawler specifically for AI training data (Bard/Gemini). Separate from regular search indexing. Blocks AI training while preserving Google Search visibility.
User-agent: Google-Extended
Allow: /

# === TRAINING & DATA COLLECTION BOTS ===
# Allow these if you want your content used for AI model training
# facebookexternalhit - Meta's crawler for link previews, content analysis, and Meta AI training. Used across Facebook, Instagram, WhatsApp, and Meta AI products.
User-agent: facebookexternalhit
Allow: /

# meta-externalagent - Meta's crawler for link previews, content analysis, and Meta AI training. Used across Facebook, Instagram, WhatsApp, and Meta AI products.
User-agent: meta-externalagent
Allow: /

# Applebot-Extended - Apple's dedicated AI training crawler for Apple Intelligence. Separate from regular Applebot to allow selective AI training control.
User-agent: Applebot-Extended
Allow: /

# Applebot - Apple's main crawler for Siri, Spotlight search, and general Apple services. Essential for Apple ecosystem discoverability.
User-agent: Applebot
Allow: /

# Bytespider - ByteDance's web crawler for TikTok and international AI products. Replaces older Bytedance user-agent with current Bytespider.
User-agent: Bytespider
Allow: /

# === GENERAL OPTIMIZATIONS ===
# Sitemap helps AI bots discover your content efficiently
# Include both apex and www variants for maximum compatibility
# Invalid URL provided - please enter a valid URL to generate sitemap entries
Sitemap: https://www.joinamble.com/sitemap.xml

# === COMMON EXCLUSIONS ===
# Block admin and private areas for all bots
User-agent: *
Disallow: /admin/
Disallow: /private/
Disallow: /wp-admin/
Disallow: /api/
Disallow: /.env
Disallow: /config/