# robots.txt for FindMine Marketing Website # Last updated: March 2025 # Allow all well-behaved bots User-agent: * # Explicitly allow access to llms.txt Allow: /llms.txt # Allow crawling of most content (specific Allows below are redundant but harmless) Allow: / # Allow: /blog/ # Allow: /case-studies/ # Allow: /about/ # Allow: /demo/ # Obsolete path # Allow: /contact-us/ # Path is likely /contact # Prevent crawling of development/staging areas Disallow: /dev/ Disallow: /staging/ Disallow: /test/ Disallow: /.env Disallow: /.git/ Disallow: /dist/ Disallow: /src/ Disallow: /.bolt/ # Prevent crawling of admin and internal areas Disallow: /admin/ Disallow: /internal/ Disallow: /dashboard/ # Prevent crawling of coverage reports Disallow: /coverage/ # Prevent crawling of search results and filtered pages Disallow: /search Disallow: /*?query= Disallow: /*?filter= Disallow: /*?sort= # Prevent crawling of temporary or draft content Disallow: /drafts/ Disallow: /tmp/ Disallow: /temp/ # Prevent indexing of duplicate content (Likely obsolete) # Disallow: /print/ # Disallow: /pdf/ # Disallow: /amp/ # Crawl-delay for rate limiting (Removed - Generally not recommended unless specific issues arise) # Crawl-delay: 10 # Sitemap location Sitemap: https://www.findmine.com/sitemap.xml # Special rules for specific bots User-agent: GPTBot Allow: /llms.txt Disallow: / User-agent: ChatGPT-User Allow: /llms.txt Disallow: / User-agent: Google-Extended Allow: /llms.txt Disallow: / User-agent: CCBot Allow: /llms.txt Disallow: / # Block AI training crawlers but allow access to llms.txt User-agent: anthropic-ai Allow: /llms.txt Disallow: / User-agent: Claude-Web Allow: /llms.txt Disallow: / User-agent: Omgilibot Allow: /llms.txt Disallow: / User-agent: Omgili Allow: /llms.txt Disallow: /