# robots.txt for efexploreamerica.com # Last updated: 2026-04-22 # Algolia-Crawler-Verif: 90E775AD7ADA5970 # ============================================================================= # DEFAULT RULES (search engines, standard crawlers) # ============================================================================= User-agent: * Allow: / Allow: /educational-tours/search Allow: /llms.txt # Internal tools and admin Disallow: /secure/ Disallow: /sitecore/ Disallow: /api/ Disallow: /forms # Account and personal Disallow: /account/ # Preview and print (duplicate content) Disallow: /preview-tour* Disallow: /print-tour/ # Legacy paths (redirect to internal corp domain -- security risk) Disallow: /ctt_pdf Disallow: /eliterature/ # Pricing Disallow: /educational-tour/*/detailed-price # Error pages Disallow: /error/ # Inactive paths Disallow: /tour-website # Review pagination and photos (duplicate URL variations) Disallow: /educational-tour/*/reviews/photos Disallow: /educational-tour/*/reviews/more # Help center search (dynamic params, thin content) Disallow: /help-center/search-results # User-specific pages Disallow: /invite/ Disallow: /referral/ Disallow: /accountrecovery # Legacy duplicate Disallow: /Default.aspx # Search with query parameters (thin/duplicate content) Disallow: /educational-tours/search?query= # Sensitive query parameters Disallow: /*?*reviewId= Disallow: /*?*accountnumber= Disallow: /*?*individualid= Disallow: /*?*__RequestVerificationToken= # UTM and tracking parameters (prevent duplicate indexing) Disallow: /*?*utm_source= Disallow: /*?*utm_medium= Disallow: /*?*utm_campaign= Disallow: /*?*utm_term= Disallow: /*?*utm_content= Disallow: /*?*utm_segment= Disallow: /*?*sourcecode= Disallow: /*?*utm_group= Disallow: /*?*utm_season= Disallow: /*?*url= Disallow: /*?*source= # ============================================================================= # AI TRAINING CRAWLERS (blocked) # # These crawlers scrape content for AI model training, not search. # Blocking them protects content from unauthorized training use while # keeping the site visible in AI-powered search results. # ============================================================================= User-agent: CCBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: ClaudeBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Bytespider Disallow: / User-agent: FacebookBot Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Omgilibot Disallow: / User-agent: Diffbot Disallow: / User-agent: Amazonbot Disallow: / User-agent: YouBot Disallow: / # ============================================================================= # AI SEARCH CRAWLERS (allowed) # # GPTBot (ChatGPT search), PerplexityBot (Perplexity search), Bingbot # (Copilot), and Googlebot (AI Overviews) are NOT blocked. They fall # under User-agent: * and can crawl public content normally. This ensures # efexploreamerica.com appears in AI-powered search results and gets # cited in AI responses. # # Content usage policy: see https://www.efexploreamerica.com/llms.txt # ============================================================================= Sitemap: https://www.efexploreamerica.com/sitemap.xml