# robots.txt — controlio.net # Updated: 2026-04-03 # Changes: Allowed LLM/AI crawlers and fixed Mediapartners-Google formatting. # ─── GOOGLE ──────────────────────────────────────────────────────────────────── User-agent: Googlebot Allow: / Allow: /css/ Allow: /js/ Disallow: /api_docs/components/ Disallow: /api_docs/resources/ Disallow: /index_* Disallow: /bug-bounty.html # Google AdSense / Media Partners — must be its own block with explicit Allow User-agent: Mediapartners-Google Allow: / # Google AI (Gemini training) — previously blocked, now allowed User-agent: Google-Extended Allow: / # ─── OPENAI / CHATGPT ────────────────────────────────────────────────────────── # ChatGPT training crawler — previously blocked, now allowed User-agent: GPTBot Allow: / # ChatGPT real-time web search crawler User-agent: OAI-SearchBot Allow: / # ─── OTHER AI / LLM CRAWLERS ─────────────────────────────────────────────────── # Anthropic (Claude) User-agent: ClaudeBot Allow: / User-agent: anthropic-ai Allow: / # Perplexity AI User-agent: PerplexityBot Allow: / # Apple AI (Siri, Apple Intelligence) User-agent: Applebot-Extended Allow: / # Common Crawl — feeds many open LLMs; allow public content only User-agent: CCBot Allow: /blog/ Allow: /employee-monitoring.html Allow: /prices.html Allow: /security.html Allow: /gdpr.html Allow: /about.html Disallow: /api_docs/ Disallow: /app/ # Amazon Alexa / AI User-agent: Amazonbot Allow: / # ─── BING ────────────────────────────────────────────────────────────────────── User-agent: Bingbot Allow: / Disallow: /api_docs/components/ Disallow: /api_docs/resources/ Disallow: /index_* Disallow: /bug-bounty.html # ─── MISC / VALIDATORS ───────────────────────────────────────────────────────── User-agent: teoma Allow: / User-agent: W3C-checklink Allow: / User-agent: WDG_SiteValidator Disallow: / # ─── BLOCKED CRAWLERS ────────────────────────────────────────────────────────── # SentiBot (sentiment analysis scraper) User-agent: sentibot Disallow: / # webz.io scrapers User-agent: omgili Disallow: / User-agent: omgilibot Disallow: / # Facebook crawler (not an LLM search agent) User-agent: FacebookBot Disallow: / # ─── DEFAULT (all other bots) ────────────────────────────────────────────────── User-agent: * Allow: / Disallow: /api_docs/components/ Disallow: /api_docs/resources/ Disallow: /index_* Disallow: /bug-bounty.html # ─── SITEMAP ─────────────────────────────────────────────────────────────────── Sitemap: https://controlio.net/sitemap.xml