# robots.txt for WoodUpp.com (International) # Updated: February 2026 # Sitemap reference Sitemap: https://woodupp.com/sitemap_index.xml # LLM-friendly content # Reference to llms.txt for AI crawlers # See https://llmstxt.org/ for more information User-agent: * Allow: /llms.txt Allow: /llms-full.txt # ==================================== # GENERAL RULES FOR ALL CRAWLERS # ==================================== User-agent: * # Allow important pages Allow: / Allow: /shop/ Allow: /product/ Allow: /guides/ Allow: /customer-case/ Allow: /collection/ # WordPress admin and backend Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /wp-login.php Disallow: /wp-register.php Allow: /wp-content/uploads/ # WooCommerce specific Disallow: /cart/ Disallow: /checkout/ Disallow: /my-account/ Disallow: /customer-logout/ Disallow: /return-portal/ Disallow: /pause-subscription/ # Search and filters Disallow: /*?s= Disallow: /*?add-to-cart= Disallow: /*?removed_item= Disallow: /*&add-to-cart= Disallow: /*?orderby= Disallow: /*&orderby= # Internal test/dev pages Disallow: /test/ Disallow: /test-2/ Disallow: /datalayer-test/ Disallow: /form-test/ Disallow: /product-carousel/ Disallow: /visualizer-2-0/ Disallow: /trade-show-visualizer/ # Tracking and session parameters Disallow: /*?utm_ Disallow: /*&utm_ Disallow: /*?ref= Disallow: /*&ref= Disallow: /*?fbclid= Disallow: /*&fbclid= Disallow: /*?gclid= Disallow: /*&gclid= # Feeds (optional - allow if you want RSS indexed) Disallow: /feed/ Disallow: /*/feed/ Disallow: /comments/feed/ # Tags and categories (uncomment if not used strategically) # Disallow: /tag/ # Disallow: /category/ # ==================================== # SPECIFIC CRAWLER RULES # ==================================== # GPTBot (OpenAI/ChatGPT crawler) User-agent: GPTBot Allow: / Allow: /llms.txt Allow: /llms-full.txt Disallow: /wp-admin/ Disallow: /cart/ Disallow: /checkout/ # ChatGPT-User (when users use Browse with Bing) User-agent: ChatGPT-User Allow: / Allow: /llms.txt Allow: /llms-full.txt Disallow: /wp-admin/ Disallow: /cart/ # Google-Extended (Google's AI training) # Set to Disallow if you do NOT want your content used for AI training User-agent: Google-Extended Allow: / Allow: /llms.txt Allow: /llms-full.txt # CCBot (Common Crawl - used for AI training) # Set to Disallow if you do NOT want your content in Common Crawl User-agent: CCBot Allow: / Allow: /llms.txt Allow: /llms-full.txt # ClaudeBot (Anthropic's crawler) User-agent: Claude-Web Allow: / Allow: /llms.txt Allow: /llms-full.txt Disallow: /wp-admin/ Disallow: /cart/ # Perplexity AI User-agent: PerplexityBot Allow: / Allow: /llms.txt Allow: /llms-full.txt # Meta AI (Facebook/Instagram AI) User-agent: FacebookBot Allow: / Allow: /llms.txt User-agent: meta-externalagent Allow: / Allow: /llms.txt # Applebot (Apple Intelligence, Siri, Spotlight) User-agent: Applebot Allow: / Allow: /llms.txt User-agent: Applebot-Extended Allow: / Allow: /llms.txt # Bytespider (TikTok/ByteDance crawler) # Often very aggressive - consider blocking entirely User-agent: Bytespider Crawl-delay: 10 Disallow: / # Amazonbot User-agent: Amazonbot Allow: / Allow: /llms.txt # Yandex (Russian search engine) User-agent: YandexBot Crawl-delay: 2 Allow: / # Baidu (Chinese search engine) User-agent: Baiduspider Crawl-delay: 2 Allow: / # SemrushBot (SEO tool) User-agent: SemrushBot Crawl-delay: 2 Allow: / # AhrefsBot (SEO tool) User-agent: AhrefsBot Crawl-delay: 2 Allow: / # ==================================== # BLOCK BAD BOTS # ==================================== # Scrapers and bad bots User-agent: MJ12bot User-agent: DotBot User-agent: BLEXBot User-agent: DataForSeoBot Crawl-delay: 10 # Very aggressive or spam bots - full block User-agent: SurveyBot User-agent: ia_archiver User-agent: archive.org_bot Disallow: /