# ============================================================ # ROBOTS.TXT - WordPress Template # ============================================================ # ============================================================ # ---------------------------------------------------------- # 1. GENERAL RULES (all bots) # ---------------------------------------------------------- User-agent: * Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-login.php Disallow: /wp-register.php Disallow: /xmlrpc.php Disallow: /readme.html Disallow: /license.txt Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/cache/ Disallow: /wp-json/ Disallow: /feed/ Disallow: /?s= Disallow: /search/ Disallow: /author/ Disallow: /tag/ Disallow: /*?replytocom= Disallow: /*?doing_wp_cron Disallow: /wp-content/uploads/wpcf7_uploads/ # Allow CSS/JS/images for proper rendering Allow: /wp-content/uploads/ Allow: /wp-content/themes/ # ---------------------------------------------------------- # 2. SEARCH ENGINES - Allowed (SEO) # ---------------------------------------------------------- # Googlebot, Bingbot, Yandex, etc. follow the general # rules above. No need to list them individually. # ---------------------------------------------------------- # 3. SOCIAL / PREVIEW BOTS - Allowed # ---------------------------------------------------------- # These bots generate previews when you share a link. # If you block them, no preview cards on social and chat. User-agent: FacebookBot Allow: / User-agent: LinkedInBot Allow: / User-agent: Twitterbot Allow: / User-agent: TelegramBot Allow: / User-agent: Slackbot Allow: / User-agent: WhatsApp Allow: / User-agent: Discordbot Allow: / # ---------------------------------------------------------- # 4. AI BOTS - SEARCH/CITATION - Allowed # ---------------------------------------------------------- # These bots fetch content in real time to answer user # questions and cite sources. # Blocking them = invisibility in AI answers. User-agent: OAI-SearchBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: ClaudeBot Allow: / User-agent: claude-web Allow: / User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / User-agent: DuckAssistBot Allow: / User-agent: Applebot Allow: / # ---------------------------------------------------------- # 5. AI BOTS - TRAINING - Blocked # ---------------------------------------------------------- # These bots collect data to train AI models. # They don't drive traffic, don't cite sources, they only # use your content for training. # OpenAI - training User-agent: GPTBot Disallow: / # Google - AI training (Gemini, Vertex AI) User-agent: Google-Extended Disallow: / # Anthropic - training User-agent: anthropic-ai Disallow: / # Apple - AI training User-agent: Applebot-Extended Disallow: / # Meta - AI training User-agent: Meta-ExternalAgent Disallow: / User-agent: meta-externalagent Disallow: / # ByteDance/TikTok - training (very aggressive crawling) User-agent: Bytespider Disallow: / # Common Crawl - public dataset used for AI training User-agent: CCBot Disallow: / # Cohere - training User-agent: cohere-ai Disallow: / # Amazon - AI training User-agent: Amazonbot Disallow: / # Diffbot - structured scraping for ML User-agent: Diffbot Disallow: / # AI2 / Allen Institute - AI research User-agent: AI2Bot Disallow: / # Omgili - forum/comment scraping for AI User-agent: omgili Disallow: / # Timpi - decentralized search User-agent: Timpibot Disallow: / # Webz.io - sells crawled data to AI companies User-agent: webzio-extended Disallow: / # ImagesiftBot - image collection for training User-agent: ImagesiftBot Disallow: / # iaskspider User-agent: iaskspider Disallow: / # ---------------------------------------------------------- # 6. AGGRESSIVE SEO SCRAPERS - Blocked # ---------------------------------------------------------- # Optional: uncomment ONLY if you DON'T use these tools. # If you use Ahrefs, SEMrush, Moz etc. for your clients, # keep them commented out! # User-agent: AhrefsBot # Disallow: / # User-agent: SemrushBot # Disallow: / # User-agent: MJ12bot # Disallow: / # User-agent: DotBot # Disallow: / # User-agent: BLEXBot # Disallow: / # User-agent: rogerbot # Disallow: / # User-agent: SISTRIX # Disallow: / # ---------------------------------------------------------- # 7. SITEMAP # ---------------------------------------------------------- # Replace with the correct URL of your sitemap. # If you use Rank Math, Yoast, or All in One SEO, # the path may be different. Sitemap: https://www.davidfeldman.com/sitemap_index.xml