# robots.txt for wilcotecapetown.co.za # Last updated: 2025-09-04 # # This file is optimized to grant maximum visibility to key AI and search engine crawlers while # protecting sensitive directories and blocking data harvesters. Sitemap: https://wilcotecapetown.co.za/sitemap_index.xml # --- DEFAULT CRAWLER RULES --- # By default, allow all reputable bots to crawl the entire site. User-agent: * Allow: / Disallow: /wp-admin/ Allow: /wp-admin/admin-ajax.php # --- PRIORITY CRAWLERS: SEARCH ENGINES & AIs --- # Explicitly allow major crawlers to ensure they find all content. # Setting a Crawl-delay for polite, non-Google bots. # Google Ecosystem User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / User-agent: Googlebot-Mobile Allow: / User-agent: Googlebot-Video Allow: / # OpenAI (ChatGPT) User-agent: ChatGPT-User Allow: / Crawl-delay: 1 User-agent: GPTBot Allow: / Crawl-delay: 1 User-agent: oai-bot Allow: / Crawl-delay: 1 # Google AI (Gemini/Bard) User-agent: GeminiBot Allow: / Crawl-delay: 1 User-agent: BardBot Allow: / User-agent: Google-InspectionTool Allow: / # Microsoft Search User-agent: BingBot Allow: / Crawl-delay: 1 User-agent: MSNBot Allow: / Crawl-delay: 1 # Anthropic (Claude) User-agent: ClaudeBot Allow: / Crawl-delay: 1 User-agent: Claude-Web Allow: / Crawl-delay: 1 User-agent: anthropic-ai Allow: / Crawl-delay: 1 User-agent: AnthropicAI Allow: / Crawl-delay: 1 # Perplexity AI User-agent: PerplexityBot Allow: / Crawl-delay: 1 # xAI (Grok) User-agent: xAI-User Allow: / Crawl-delay: 1 User-agent: xAI-Crawler Allow: / Crawl-delay: 1 # Other AI Companies User-agent: CoHereBot Allow: / User-agent: AI21Labs Allow: / User-agent: MistralBot Allow: / User-agent: MetaAI Allow: / # International Search Engines User-agent: YandexBot Allow: / User-agent: DuckDuckBot Allow: / User-agent: BaiduSpider Allow: / User-agent: SogouSpider Allow: / # Alternative Search/AI Platforms User-agent: YouBot Allow: / User-agent: KagiBot Allow: / User-agent: NeevaAI Allow: / User-agent: PhindBot Allow: / User-agent: DuckAssistBot Allow: / User-agent: SearchGPT Allow: / # --- SOCIAL MEDIA & AGGREGATORS --- User-agent: TwitterBot Allow: / User-agent: LinkedInBot Allow: / User-agent: facebookexternalhit Allow: / User-agent: WhatsApp Allow: / User-agent: TelegramBot Allow: / User-agent: SkypeBot Allow: / User-agent: SlackBot Allow: / User-agent: DiscordBot Allow: / User-agent: AppleNewsBot Allow: / User-agent: FlipboardProxy Allow: / User-agent: PocketParser Allow: / # --- SEO & ANALYTICS TOOLS --- User-agent: AhrefsBot Allow: / User-agent: SemrushBot Allow: / User-agent: MJ12bot Allow: / User-agent: DotBot Allow: / # --- BLOCKED CRAWLERS --- # Block known training data harvesters and problematic crawlers to control data use. User-agent: Google-Extended Disallow: / User-agent: CCBot Disallow: / User-agent: Common Crawl Disallow: / User-agent: Amazonbot Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Meta-ExternalAgent Disallow: / User-agent: FacebookBot Disallow: / User-agent: ByteSpider Disallow: / User-agent: ByteDance Disallow: / User-agent: scrapy Disallow: / User-agent: python-requests Disallow: / User-agent: wget Disallow: / User-agent: curl Disallow: / # --- WORDPRESS SECURITY & CLEANUP --- # Block access to sensitive WordPress files and non-essential directories. User-agent: * Disallow: /wp-login.php Disallow: /wp-register.php Disallow: /wp-config.php Disallow: /wp-content/uploads/backups/ Disallow: /wp-content/debug.log Disallow: /xmlrpc.php Disallow: /readme.html Disallow: /license.txt