# ============================================ # ULTIMATE ECOMMERCE ROBOTS.TXT # Optimized for AI Crawlers & Search Engines # ============================================ # ========== MAJOR SEARCH ENGINES ========== User-agent: Googlebot Allow: / Crawl-delay: 1 # Allow Google to access everything for maximum visibility User-agent: Googlebot-image Allow: / Crawl-delay: 1 # Essential for product image SEO User-agent: Googlebot-news Allow: / User-agent: Googlebot-video Allow: / User-agent: Bingbot Allow: / Crawl-delay: 1 User-agent: Slurp Allow: / Crawl-delay: 2 User-agent: DuckDuckBot Allow: / Crawl-delay: 1 User-agent: Baiduspider Allow: / Crawl-delay: 2 User-agent: YandexBot Allow: / Crawl-delay: 1 User-agent: NaverBot Allow: / User-agent: Sogou Allow: / # ========== AI TRAINING & RESEARCH CRAWLERS ========== User-agent: OpenAI-SearchBot Allow: / # OpenAI's official search crawler User-agent: ChatGPT-User Allow: / # ChatGPT web browsing User-agent: GPTBot Allow: / # OpenAI's main crawler for training User-agent: Google-Extended Allow: / # Google's AI training crawler (Bard/Gemini) User-agent: anthropic-ai Allow: / # Anthropic's crawler for Claude User-agent: Claude-Web Allow: / # Alternative Anthropic crawler name User-agent: PerplexityBot Allow: / # Perplexity AI's search crawler User-agent: YouBot Allow: / # You.com AI search User-agent: CCBot Allow: / # Common Crawl (used by many AI companies) User-agent: Bytespider Allow: / # ByteDance/TikTok's crawler User-agent: ImagesiftBot Allow: / # AI image analysis User-agent: Diffbot Allow: / # AI-powered web scraping User-agent: anthropic Allow: / # Alternative Anthropic identifier User-agent: ClaudeBot Allow: / # Potential future Claude crawler User-agent: ChatGPTBot Allow: / # Alternative OpenAI crawler name User-agent: OpenAI Allow: / # Generic OpenAI identifier User-agent: AI2Bot Allow: / # Allen Institute for AI User-agent: FacebookBot Allow: / # Meta's AI crawler User-agent: MetaBot Allow: / # Meta's general crawler # ========== SOCIAL MEDIA CRAWLERS ========== User-agent: Twitterbot Allow: / # Essential for social commerce User-agent: facebookexternalhit Allow: / # Facebook link previews and shopping User-agent: LinkedInBot Allow: / # LinkedIn business presence User-agent: WhatsApp Allow: / # WhatsApp link previews User-agent: TelegramBot Allow: / # Telegram link previews User-agent: SkypeUriPreview Allow: / User-agent: vkShare Allow: / # VKontakte (popular in Eastern Europe) User-agent: PinterestBot Allow: / # Pinterest shopping integration User-agent: redditbot Allow: / User-agent: Slackbot Allow: / User-agent: DiscordBot Allow: / # ========== ECOMMERCE & SHOPPING CRAWLERS ========== User-agent: Shopify-Partner Allow: / User-agent: ShopifyPartner Allow: / User-agent: PriceGrabber Allow: / User-agent: Shoppingcom Allow: / User-agent: BizRate Allow: / User-agent: Nextag Allow: / User-agent: Shopping Allow: / User-agent: PriceRunner Allow: / User-agent: Kelkoo Allow: / User-agent: Twenga Allow: / User-agent: Shopzilla Allow: / # ========== ACADEMIC & RESEARCH CRAWLERS ========== User-agent: ia_archiver Allow: / # Internet Archive User-agent: Wayback Allow: / # Wayback Machine User-agent: archive.org_bot Allow: / # Internet Archive User-agent: SemrushBot Allow: / # SEO analysis (beneficial for competitive intelligence) User-agent: AhrefsBot Allow: / # SEO analysis User-agent: MJ12bot Allow: / # Majestic SEO User-agent: DotBot Allow: / # Moz/OpenSiteExplorer User-agent: SeznamBot Allow: / # Czech search engine (relevant for Central Europe) # ========== SPECIALIZED AI CRAWLERS ========== User-agent: Claude-4 Allow: / User-agent: Claude-3 Allow: / User-agent: Claude-2 Allow: / User-agent: GPT-4 Allow: / User-agent: GPT-3 Allow: / User-agent: PaLM Allow: / # Google's language model User-agent: LaMDA Allow: / # Google's conversational AI User-agent: Copilot Allow: / # Microsoft Copilot User-agent: ChatSonic Allow: / User-agent: Jasper Allow: / User-agent: Copy.ai Allow: / # ========== AI CRAWLERS - PRIVACY CONSCIOUS BLOCKING ========== # Block private/sensitive areas from AI training while allowing search engines User-agent: GPTBot Disallow: /admin/ Disallow: /checkout/ Disallow: /cart/ Disallow: /login/ Disallow: /register/ Disallow: /account/ Disallow: /customer/ Disallow: /payment/ Disallow: /orders/ Disallow: /wishlist/ Disallow: /api/ Disallow: /private/ User-agent: ChatGPT-User Disallow: /admin/ Disallow: /checkout/ Disallow: /cart/ Disallow: /login/ Disallow: /register/ Disallow: /account/ Disallow: /customer/ Disallow: /payment/ Disallow: /orders/ Disallow: /wishlist/ Disallow: /api/ Disallow: /private/ User-agent: CCBot Disallow: /admin/ Disallow: /checkout/ Disallow: /cart/ Disallow: /login/ Disallow: /register/ Disallow: /account/ Disallow: /customer/ Disallow: /payment/ Disallow: /orders/ Disallow: /wishlist/ Disallow: /api/ Disallow: /private/ User-agent: Google-Extended Disallow: /admin/ Disallow: /checkout/ Disallow: /cart/ Disallow: /login/ Disallow: /register/ Disallow: /account/ Disallow: /customer/ Disallow: /payment/ Disallow: /orders/ Disallow: /wishlist/ Disallow: /api/ Disallow: /private/ User-agent: anthropic-ai Disallow: /admin/ Disallow: /checkout/ Disallow: /cart/ Disallow: /login/ Disallow: /register/ Disallow: /account/ Disallow: /customer/ Disallow: /payment/ Disallow: /orders/ Disallow: /wishlist/ Disallow: /api/ Disallow: /private/ User-agent: PerplexityBot Disallow: /admin/ Disallow: /checkout/ Disallow: /cart/ Disallow: /login/ Disallow: /register/ Disallow: /account/ Disallow: /customer/ Disallow: /payment/ Disallow: /orders/ Disallow: /wishlist/ Disallow: /api/ Disallow: /private/ # ========== GOOGLE ADS CRAWLERS (ESSENTIAL FOR ADVERTISING) ========== User-agent: AdsBot-Google Allow: / # Essential for Google Ads landing page verification User-agent: AdsBot-Google-Mobile Allow: / # Essential for mobile ads and Shopping campaigns User-agent: Mediapartners-Google Allow: / # Essential for AdSense and advertising User-agent: AdsBot-Google-Mobile-Apps Allow: / # Mobile app ads # ========== BLOCK PROBLEMATIC CRAWLERS ========== User-agent: SiteAuditBot Disallow: / User-agent: spbot Disallow: / User-agent: MegaIndex Disallow: / User-agent: BLEXBot Disallow: / User-agent: SemrushBot-SA Disallow: / # ========== ROMANIAN MARKET OPTIMIZATION ========== # Explicitly allow Romanian subpage for all crawlers User-agent: * Allow: /ro/ Allow: /ro/* # Romanian search engines User-agent: RomanianBot Allow: / User-agent: RoBot Allow: / # ========== ECOMMERCE-SPECIFIC RESTRICTIONS ========== # Block duplicate content and system pages for ALL crawlers User-agent: * Allow: / # Block admin and sensitive areas Disallow: /admin/ Disallow: /administrator/ Disallow: /wp-admin/ Disallow: /checkout/ Disallow: /cart/ Disallow: /login/ Disallow: /register/ Disallow: /account/ Disallow: /customer/ Disallow: /payment/ Disallow: /orders/ Disallow: /order-history/ Disallow: /wishlist/ Disallow: /compare/ Disallow: /api/ Disallow: /private/ Disallow: /tmp/ Disallow: /temp/ Disallow: /cache/ # Block duplicate and filtered URLs (but NOT /ro/ subdirectory) Disallow: /search?* Disallow: /*?sort=* Disallow: /*?filter=* Disallow: /*?page=* Disallow: /*?limit=* Disallow: /*?order=* Disallow: /*?direction=* Disallow: /*?view=* Disallow: /*?mode=* Disallow: /*?currency=* Disallow: /*?lang=* Disallow: /*/page/* Disallow: /*/sort/* Disallow: /*/filter/* # Note: /ro/ subdirectory is explicitly allowed above # Block common system files Disallow: /*.php$ Disallow: /*.inc$ Disallow: /*.class$ Disallow: /*.asp$ Disallow: /*.aspx$ Disallow: /*.cgi$ # Block common directories Disallow: /cgi-bin/ Disallow: /includes/ Disallow: /misc/ Disallow: /modules/ Disallow: /scripts/ Disallow: /templates/ Disallow: /sites/ Disallow: /system/ Disallow: /logs/ Disallow: /config/ # ========== SITEMAPS ========== # Multiple sitemaps for better organization Sitemap: https://www.pirosbolt.hu/sitemap.xml Sitemap: https://www.pirosbolt.hu/sitemap-products.xml Sitemap: https://www.pirosbolt.hu/sitemap-categories.xml Sitemap: https://www.pirosbolt.hu/sitemap-images.xml Sitemap: https://www.pirosbolt.hu/sitemap-news.xml # ========== CRAWL DELAYS ========== # Note: Crawl delays are already specified above for specific bots # Default crawl delay for unspecified crawlers is managed by server