# =============================== # robots.txt - ImperiaPost.it # Ultimo aggiornamento: 07/11/2025 # Ottimizzato per SEO Google 2025 # =============================== # --- Regole per tutti i crawler --- User-agent: * # WordPress - blocco cartelle di sistema Disallow: /cgi-bin/ Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/themes/ # Consenti risorse critiche per rendering (Core Web Vitals) Allow: /wp-admin/admin-ajax.php Allow: /wp-content/uploads/ Allow: /wp-content/themes/*.css Allow: /wp-content/themes/*.js Allow: /wp-content/plugins/*.css Allow: /wp-content/plugins/*.js # Pagine di ricerca e contenuto duplicato Disallow: /search/ Disallow: /?s= Disallow: /page/ Disallow: */feed/ Disallow: */trackback/ Disallow: */attachment/ Disallow: /author/*/page/ # Parametri URL da evitare (tracking e duplicati) Disallow: /*?replytocom Disallow: /*?utm_* Disallow: /*?fbclid= Disallow: /*& # File e pagine WordPress sensibili Disallow: /wp-*.php Disallow: /xmlrpc.php Disallow: /readme.html Disallow: /license.txt # --- Google Bot (massima priorità) --- User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: /wp-content/uploads/ User-agent: Googlebot-News Allow: / User-agent: Mediapartners-Google Allow: / User-agent: AdsBot-Google Allow: / # --- FACEBOOK - tutti gli user-agent (obbligatorio 2026) --- User-agent: facebookexternalhit Allow: / User-agent: facebot Allow: / User-agent: FacebookExternalHit Allow: / User-agent: Facebook App Allow: / # --- Bing --- User-agent: Bingbot Allow: / Crawl-delay: 5 # --- Archivi web (importante per giornali) --- User-agent: ia_archiver Allow: / # --- Bot aggressivi e AI scraper da bloccare --- User-agent: AhrefsBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: SEMrushBot Disallow: / User-agent: SemrushBot-SA Disallow: / User-agent: MegaIndex.ru Disallow: / User-agent: megaindex.com Disallow: / User-agent: SEOkicks-Robot Disallow: / User-agent: SISTRIX Disallow: / User-agent: rogerbot Disallow: / User-agent: DotBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: Wget Disallow: / User-agent: TurnitinBot Disallow: / User-agent: YoudaoBot Disallow: / User-agent: Yeti Disallow: / User-agent: pimonster Disallow: / User-agent: pricepi Disallow: / User-agent: Cuam Disallow: / User-agent: Pixray-Seeker Disallow: / User-agent: UptimeRobot Disallow: / User-agent: SimplePie Disallow: / User-agent: Heritrix Disallow: / User-agent: Vagabondo Disallow: / User-agent: PetalBot Disallow: / User-agent: Bytespider Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: CCBot Disallow: / User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: Google-Extended Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Claude-Web Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: omgili Disallow: / User-agent: Diffbot Disallow: / # --- Sitemap XML --- Sitemap: https://imperiapost.it/sitemap_index.xml # =============================== # Note tecniche novembre 2025: # - CSS/JS consentiti per Core Web Vitals # - Parametri URL bloccati contro duplicati # - AI bot bloccati (GPTBot, CCBot, Claude, Applebot-Extended) # - Archive.org consentito per storico articoli # - Crawl-delay solo per Bing (Google lo ignora) # - NO duplicati bot SEO (rimossi Allow conflittuali) # ===============================