# ============================================================ # robots.txt — Diário Carioca v8.4 (BLINDAGEM IA & E-E-A-T) # Foco: Liberar Respostas (Retrieval) | Bloquear Treinamento (Training) # ============================================================ User-agent: * # Proteção de Diretórios Estruturais Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php Disallow: /cgi-bin/ Disallow: /checkout/ Disallow: /minha-conta/ # Limpeza de Parâmetros (Evita conteúdo duplicado no Crawl Budget) Disallow: /*?replytocom= Disallow: /*?utm_* Disallow: /*?dc_* Disallow: /*?preview= Disallow: /trackback/ Disallow: /embed/ # Permitir buscas internas Allow: /?s= Allow: /search/ # ─── GRUPO 1: IA PARA RESPOSTAS E CITAÇÕES (LIBERADO) ──────── # Estes bots buscam a internet em tempo real para citar a fonte User-agent: ChatGPT-User User-agent: OAI-SearchBot User-agent: PerplexityBot User-agent: Perplexity-User User-agent: ClaudeBot User-agent: Claude-Web User-agent: anthropic-ai User-agent: Applebot-Extended User-agent: YouBot Allow: / # ─── GRUPO 2: IA PARA TREINAMENTO (BLOQUEADO) ──────────────── # Estes bots raspam conteúdo para treinar modelos base sem dar clique User-agent: Google-Extended User-agent: GPTBot User-agent: CCBot User-agent: cohere-ai User-agent: AI2Bot User-agent: Bytespider User-agent: Diffbot User-agent: ImagesiftBot User-agent: Meta-ExternalAgent User-agent: Amazonbot Disallow: / # ─── GRUPO 3: MOTORES DE BUSCA TRADICIONAIS (LIBERADO) ─────── User-agent: Googlebot User-agent: Googlebot-News User-agent: Google-InspectionTool User-agent: bingbot User-agent: Applebot Allow: / # ─── SITEMAPS GLOBAIS & MULTILÍNGUES ───────────────────────── Sitemap: https://diariocarioca.com/sitemap_index.xml Sitemap: https://diariocarioca.com/sitemap-news.xml Sitemap: https://diariocarioca.com/news-live.xml Sitemap: https://diariocarioca.com/image-sitemap.xml Sitemap: https://diariocarioca.com/sitemap-categorias.xml Sitemap: https://diariocarioca.com/sitemap-pt.xml Sitemap: https://diariocarioca.com/sitemap-en.xml Sitemap: https://diariocarioca.com/sitemap-es.xml Sitemap: https://diariocarioca.com/sitemap-fr.xml Sitemap: https://diariocarioca.com/sitemap-it.xml Sitemap: https://diariocarioca.com/sitemap-de.xml Sitemap: https://diariocarioca.com/sitemap-ru.xml Sitemap: https://diariocarioca.com/sitemap-zh.xml Sitemap: https://diariocarioca.com/sitemap-ar.xml