############################# # Core: keep admin private ############################# User-agent: * Disallow: /wp-admin/ Disallow: /wp-login.php Disallow: /wp-register.php Allow: /wp-admin/admin-ajax.php ############################# # Reduce crawl waste on thin/duplicate pages ############################# # WordPress search & preview Disallow: /search/ Disallow: /?s= Disallow: /*?*preview=true # Tag & author archives (keep categories indexable if you use them for SEO) Disallow: /tag/ Disallow: /author/ # Feeds & trackbacks Disallow: /*/feed/ Disallow: /*/trackback/ # Tracking/query params (don’t index duplicate URLs with campaign params) Disallow: /*?*utm_= Disallow: /*?*gclid= Disallow: /*?*fbclid= Disallow: /*?*mc_cid= Disallow: /*?*mc_eid= Disallow: /*?*replytocom= ############################# # Ensure CSS/JS/media are crawlable ############################# Allow: /*.css$ Allow: /*.js$ Allow: /wp-content/uploads/ ############################# # Forms uploads (keep blocked as today) ############################# # START WPFORMS BLOCK User-agent: * Disallow: /wp-content/uploads/wpforms/ # END WPFORMS BLOCK ############################# # LLM & generative engine friendliness # (Explicitly allowed — default is allow, but we make it clear) ############################# User-agent: GPTBot Allow: / User-agent: OAI-SearchBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: CCBot Allow: / User-agent: PerplexityBot Allow: / User-agent: Google-Extended Allow: / User-agent: Applebot-Extended Allow: / ############################# # Tame aggressive SEO crawlers without slowing search engines ############################# User-agent: AhrefsBot Crawl-delay: 10 User-agent: SemrushBot Crawl-delay: 10 User-agent: MJ12bot Crawl-delay: 10 User-agent: PetalBot Crawl-delay: 10 ############################# # Sitemap ############################# Sitemap: https://www.commbox.io/sitemap_index.xml