# ============================================================ # robots.txt # Last updated: April 2026 # ============================================================ # -------------------------------------------------- # Search engine crawlers — explicitly allowed # -------------------------------------------------- User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / User-agent: YandexBot Allow: / # -------------------------------------------------- # Default rule — allow all legitimate crawlers # -------------------------------------------------- User-agent: * Disallow: # -------------------------------------------------- # AmazonProductDiscovery — existing rules preserved # -------------------------------------------------- User-agent: AmazonProductDiscoverybot Disallow: /blog/ Disallow: /legal/ Disallow: /blog-categories/ Disallow: /blog-authors/ # -------------------------------------------------- # AI training crawlers — blocked # -------------------------------------------------- # OpenAI User-agent: GPTBot Disallow: / # Anthropic User-agent: ClaudeBot Disallow: / User-agent: Claude-Web Disallow: / User-agent: Claude-User Disallow: / User-agent: anthropic-ai Disallow: / # Google AI training (does NOT affect Google Search or Merchant/Ads) User-agent: Google-Extended Disallow: / # Common Crawl User-agent: CCBot Disallow: / # ByteDance / TikTok User-agent: Bytespider Disallow: / # Meta User-agent: meta-externalagent Disallow: / # Apple AI training User-agent: Applebot-Extended Disallow: / # Perplexity User-agent: PerplexityBot Disallow: / # Cohere User-agent: cohere-ai Disallow: / # Amazon Alexa AI User-agent: Amazonbot Disallow: / # Misc AI crawlers User-agent: AI2Bot Disallow: / User-agent: Diffbot Disallow: / User-agent: YandexGPT Disallow: / User-agent: MistralBot Disallow: / User-agent: youBot Disallow: / User-agent: NeevaAI Disallow: / User-agent: Timpibot Disallow: / User-agent: SBIntuitionsBot Disallow: / Sitemap: https://www.happyhead.com/sitemap.xml