# HouseCanary — robots.txt
# https://www.housecanary.com

# AI crawlers — explicit allow
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: ClaudeBot
Allow: /

# Default: allow all crawlers
User-agent: *
Allow: /blog/
Allow: /images/
Disallow: /search/
Disallow: /search-results/
Disallow: /link-archive/
Disallow: /contact/thank-you/
Disallow: /contact/email-error/
Disallow: /contact/market-pulse-thank-you/
Disallow: /campaigns/

# Google-Extended controls whether Google can use site content
# for Gemini/Bard AI model training (not Search indexing).
User-agent: Google-Extended
Allow: /

Sitemap: https://www.housecanary.com/sitemap.xml

# LLM-readable site summary (emerging convention)
# llms.txt: https://www.housecanary.com/llms.txt