# ──────────────────────────────────────────────────────── # SEARCH ENGINE CRAWLERS — FULLY ALLOWED # ──────────────────────────────────────────────────────── User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Bingbot Allow: / User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / # ──────────────────────────────────────────────────────── # AI SEARCH CRAWLERS — ALLOWED (retrieval only, not training) # These bots power AI Overviews, ChatGPT, Claude, Perplexity etc. # Training opt-out is handled by the X-Robots-Tag header, not here. # ──────────────────────────────────────────────────────── User-agent: Google-Extended Allow: / User-agent: GPTBot Allow: / User-agent: ClaudeBot Allow: / User-agent: anthropic-ai Allow: / User-agent: PerplexityBot Allow: / User-agent: meta-externalagent Allow: / User-agent: Amazonbot Allow: / User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / User-agent: Bytespider Disallow: / User-agent: meta-externalagent Disallow: / User-agent: Meta-ExternalAgent Disallow: / User-agent: Meta-ExternalFetcher Disallow: / User-agent: FacebookBot Disallow: / # ──────────────────────────────────────────────────────── # COMMON CRAWL — DISALLOWED (primary LLM training source) # ──────────────────────────────────────────────────────── User-agent: CCBot Disallow: / # ──────────────────────────────────────────────────────── # ALL OTHER CRAWLERS — DEFAULT RULES # ──────────────────────────────────────────────────────── User-agent: * # Allow llms.txt and AI discovery files Allow: /llms.txt Allow: /llms-full.txt Allow: /sitemap.xml Allow: /media/sitemap/ # Block Magento admin and private areas Disallow: /admin/ Disallow: /adminhtml/ Disallow: /customer/ Disallow: /checkout/ Disallow: /cart/ Disallow: /wishlist/ Disallow: /review/ Disallow: /compare/ Disallow: /tag/ Disallow: /catalogsearch/ Disallow: /search/ # Block Magento system paths Disallow: /cgi-bin/ Disallow: /var/ Disallow: /pub/ Disallow: /app/ Disallow: /lib/ Disallow: /setup/ Disallow: /update/ Disallow: /downloader/ Disallow: /errors/ Disallow: /shell/ Disallow: /report/ # Block faceted navigation (crawl budget protection) Disallow: /*?dir= Disallow: /*?limit=all Disallow: /*?limit= Disallow: /*?mode= Disallow: /*?price= Disallow: /*?color= Disallow: /*?size= Disallow: /*?manufacturer= Disallow: /*&p= # Block duplicate/internal Magento view URLs # (only if your canonical URLs are clean SEO-friendly paths like /en/product-name.html) Disallow: /catalog/product/view/ Disallow: /catalog/category/view/ Disallow: /index.php/ # ──────────────────────────────────────────────────────── # SITEMAPS # ──────────────────────────────────────────────────────── Sitemap: https://fluidics-equipment.com/sitemap.xml Sitemap: https://fluidics-equipment.com/media/sitemap/fluidics_sitemap_de.xml Sitemap: https://fluidics-equipment.com/media/sitemap/fluidics_sitemap_en.xml Sitemap: https://fluidics-equipment.com/media/sitemap/fluidics_sitemap_es.xml