# ===========================================================================
# Any.do AI Crawler Policy
# ---------------------------------------------------------------------------
# Policy: Any.do welcomes AI agents that help users discover and evaluate
# our product. We differentiate between search/answer agents (full access),
# training-only crawlers (throttled), and aggressive scrapers (blocked).
# Contact: support@any.do
# Last updated: 2026-04
# ===========================================================================

# Default rules
User-agent: *
Disallow: /get-anydo/
Disallow: /get-cal/
Disallow: /dynamic/
Sitemap: https://www.any.do/sitemap.xml
Schemamap: https://www.any.do/sitemap.xml

# ===========================================================================
# Tier 1 — Search and answer agents (full access, no throttle)
# These agents surface Any.do in search results, chat answers, and
# recommendations. Full crawl access benefits users discovering our product.
# ===========================================================================

# OpenAI — ChatGPT search and browsing
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# Anthropic — Claude
User-agent: ClaudeBot
Allow: /

User-agent: anthropic-ai
Allow: /

# Google — AI Overviews and Gemini
User-agent: Google-Extended
Allow: /

User-agent: Googlebot
Allow: /

# Perplexity — AI search engine
User-agent: PerplexityBot
Allow: /

# Apple — Siri and Apple Intelligence
User-agent: Applebot
Allow: /

User-agent: Applebot-Extended
Allow: /

# Meta — link previews and AI features
User-agent: FacebookBot
Allow: /

# Microsoft — Copilot and Bing AI
User-agent: Bingbot
Allow: /

# ===========================================================================
# Tier 2 — Training crawlers (allowed, throttled)
# These crawlers primarily collect data for model training. We allow access
# but rate-limit to reduce server load since they don't directly surface
# our product to end users.
# ===========================================================================

# Common Crawl — feeds many LLM training sets
User-agent: CCBot
Allow: /
Crawl-delay: 10

# Cohere — LLM training
User-agent: cohere-ai
Allow: /
Crawl-delay: 10

# Amazon — Alexa and model training
User-agent: Amazonbot
Allow: /
Crawl-delay: 10

# ===========================================================================
# Tier 3 — Blocked scrapers
# Aggressive crawlers with no clear benefit to Any.do users. These bots
# scrape at high volume primarily for competitive intelligence or
# undisclosed model training.
# ===========================================================================

# ByteDance — aggressive scraping, no user-facing benefit
User-agent: Bytespider
Disallow: /

# AI training scrapers with no attribution or user benefit
User-agent: AI2Bot
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: img2dataset
Disallow: /