# ============================================================================= # canna-pet.com — robots.txt # Updated: 2026-04-13 # Comprehensive crawl directives with AI crawler management # ============================================================================= # ─── Default: All crawlers ─────────────────────────────────────────────────── User-agent: * Allow: / # WordPress admin & internals Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/cache/ Disallow: /wp-json/ # WooCommerce — defensive (checkout happens on blackdahlia.co, # but block these in case WC generates the URLs) Disallow: /checkout/ Disallow: /cart/ Disallow: /my-account/ Disallow: /order-received/ Disallow: /add-to-cart/ Disallow: /*add-to-cart=* Disallow: /*add_to_wishlist=* # Search & filtered views (duplicate/thin content) Disallow: /?s= Disallow: /*?s=* Disallow: /*?orderby=* Disallow: /*?filter* Disallow: /*?product_cat=* # Paginated archives deep pages Disallow: /page/ # Feed URLs Disallow: /feed/ Disallow: /comments/feed/ Disallow: /*/feed/ # Author archives (thin pages) Disallow: /author/ # WordPress login / registration Disallow: /wp-login.php Disallow: /wp-register.php # Allow LLMs.txt endpoints Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full # Allow key content paths explicitly Allow: /product/ Allow: /products/ Allow: /breed/ Allow: /faq/ Allow: /blog/ Allow: /health-concerns/ # ─── AI Crawlers — Block training scraping ─────────────────────────────────── # These crawlers scrape for LLM training data. Block them from bulk crawling # while still serving /llms.txt and /llms-full.txt for structured discovery. User-agent: ClaudeBot Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full Disallow: / User-agent: GPTBot Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full Disallow: / User-agent: PerplexityBot Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full Disallow: / User-agent: Google-Extended Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full Disallow: / User-agent: meta-externalagent Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full Disallow: / User-agent: Applebot-Extended Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full Disallow: / User-agent: CCBot Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full Disallow: / User-agent: anthropic-ai Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full Disallow: / User-agent: cohere-ai Allow: /llms.txt Allow: /llms-full.txt Allow: /llms Allow: /llms-full Disallow: / # ─── Sitemap ───────────────────────────────────────────────────────────────── Sitemap: https://canna-pet.com/sitemap_index.xml