# =================================== # LEGITIMATE SEARCH ENGINES (ALLOWED) # =================================== # Google User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Google-Extended Allow: / # Bing User-agent: Bingbot Allow: / # Yahoo (powered by Bing) User-agent: Slurp Allow: / # DuckDuckGo User-agent: DuckDuckBot Allow: / # Yandex User-agent: Yandex Allow: / # Baidu User-agent: Baiduspider Allow: / # AI Search/Assistant Crawlers (ALLOWED) User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: anthropic-ai Allow: / User-agent: Claude-Web Allow: / User-agent: PerplexityBot Allow: / # =================================== # MALICIOUS & AGGRESSIVE CRAWLERS (BLOCKED) # =================================== # AI Training Scrapers User-agent: CCBot Disallow: / User-agent: YisouSpider Disallow: / User-agent: cohere-ai Disallow: / User-agent: Omgilibot Disallow: / User-agent: FacebookBot Disallow: / User-agent: Diffbot Disallow: / User-agent: Bytespider Disallow: / User-agent: ImagesiftBot Disallow: / # Email Harvesters User-agent: EmailCollector Disallow: / User-agent: EmailSiphon Disallow: / User-agent: WebBandit Disallow: / User-agent: EmailWolf Disallow: / User-agent: ExtractorPro Disallow: / User-agent: CherryPicker Disallow: / User-agent: CrunchBot Disallow: / # Content Scrapers User-agent: WebCopier Disallow: / User-agent: Offline Explorer Disallow: / User-agent: HTTrack Disallow: / User-agent: Microsoft.URL.Control Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebZip Disallow: / User-agent: linko Disallow: / User-agent: grub Disallow: / User-agent: grub-client Disallow: / User-agent: Surfbot Disallow: / User-agent: Mister PiX Disallow: / # SEO/Marketing Bots (can be aggressive) User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: DotBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: SeznamBot Disallow: / User-agent: linkdexbot Disallow: / User-agent: BLEXBot Disallow: / User-agent: DataForSeoBot Disallow: / # Vulnerability Scanners User-agent: Nuclei Disallow: / User-agent: Nikto Disallow: / User-agent: sqlmap Disallow: / User-agent: masscan Disallow: / User-agent: nmap Disallow: / User-agent: OpenVAS Disallow: / # Spam Bots User-agent: SurveyBot Disallow: / User-agent: SpankBot Disallow: / User-agent: BotALot Disallow: / User-agent: lwp-trivial Disallow: / User-agent: wget Disallow: / User-agent: curl Disallow: / User-agent: Java Disallow: / User-agent: Python-urllib Disallow: / # Miscellaneous Bad Bots User-agent: BackDoorBot Disallow: / User-agent: TurnitinBot Disallow: / User-agent: Zealbot Disallow: / User-agent: MSIECrawler Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: Fetch Disallow: / User-agent: ia_archiver Disallow: / User-agent: Alexibot Disallow: / User-agent: Asterias Disallow: / User-agent: PetalBot Disallow: / User-agent: MegaIndex Disallow: / User-agent: proximic Disallow: / User-agent: SEOkicks-Robot Disallow: / # =================================== # PROTECTED DIRECTORIES (ALL BOTS) # =================================== User-agent: * Disallow: /admin/ Disallow: /*.php$ Disallow: /*.sql$ Disallow: /*.log$ Disallow: /*.env$ Disallow: /login/ Disallow: /register/ # Crawl delay for remaining bots (in seconds) Crawl-delay: 10 # =================================== # SITEMAP LOCATION # =================================== Sitemap: https://kraussmaffei.com/sitemap.xml