# Default: Allow all agents User-agent: * Allow: / # Explicitly allow known AI and LLM crawlers User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: Google-Extended Allow: / User-agent: ClaudeBot Allow: / User-agent: anthropic-ai Allow: / User-agent: PerplexityBot Allow: / User-agent: facebookexternalhit Allow: / User-agent: Bytespider Allow: / User-agent: YouBot Allow: / User-agent: Amazonbot Allow: / User-agent: Bingbot Allow: / User-agent: Applebot Allow: / # Disallow sensitive admin and backend tools Disallow: /agent_tools/ Disallow: /admin/ # Block API endpoints with private data Disallow: /api/v1/leads Disallow: /api/v1/listings* # Block non-SEO critical URL parameters Disallow: /*?replytocom Disallow: /*/sign_in?referrer Disallow: /*/facebook?*referrer Disallow: /*/google_oauth2?*referrer Disallow: /*/posts?*query= Disallow: /*?zoom= Disallow: /*?referrer= Disallow: /*?hl_regions= Disallow: /*?page= Disallow: /*?c= Disallow: /*?_gl= Disallow: /*?_gcl= Disallow: /*?fbclid= Disallow: /*?utm_source= Disallow: /*?utm_medium= Disallow: /*?utm_campaign= Disallow: /*?utm_term= Disallow: /*?utm_content= Disallow: /*?fbc= Disallow: /*?fbp= # Allow structured data and multimedia for AI + SEO Allow: /structured-data/ Allow: /images/ Allow: /videos/ # Block irrelevant technical directories Disallow: /cgi-bin/ Disallow: /temp/ Disallow: /test/ # Crawl speed recommendations User-agent: SeekportBot Crawl-delay: 2 User-agent: Bingbot Crawl-delay: 2 # Google ignores crawl-delay; handled in Search Console # Sitemap location Sitemap: https://www.hawaiilife.com/sitemap.xml.gz