# ============================================================ # robots.txt for SMTnet.com # https://smtnet.com # Electronics manufacturing industry portal — SMT, PCB, EMS # Last updated: 2026-04-17 # ============================================================ User-agent: * Crawl-delay: 2 # --- Protected directories (no public content) --- Disallow: /accounts/ Disallow: /administration/ Disallow: /authorize/ Disallow: /fusionreactor/ Disallow: /cfformprotect/ Disallow: /tele/ Disallow: /circuit/ # --- Legacy/unused directories --- Disallow: /directory/ Disallow: /bookstore/ Disallow: /ecommerce/ Disallow: /sales/ # --- Internal search results (infinite URL permutations) --- Disallow: /*search_submit* Disallow: /*searchstring* # --- CRUD action URLs (forms, submissions, deletions) --- Disallow: /*fuseaction=add_* Disallow: /*fuseaction=edit_* Disallow: /*fuseaction=delete_* Disallow: /*fuseaction=login* Disallow: /*fuseaction=logout* Disallow: /*fuseaction=register* Disallow: /*fuseaction=password* Disallow: /*fuseaction=verify_email* Disallow: /*fuseaction=create_* Disallow: /*fuseaction=submit_* Disallow: /*fuseaction=approve_* Disallow: /*fuseaction=reject_* Disallow: /*fuseaction=choose_type_submit* Disallow: /*fuseaction=Thread_Delete* # --- Session token URLs (prevent duplicate crawling) --- Disallow: /*CFID=* Disallow: /*CFTOKEN=* Disallow: /*cfid=* Disallow: /*cftoken=* # --- Sitemap generators & build artifacts --- Disallow: /sitemaps/generators/ Disallow: /sitemaps/cache/ Disallow: /build/ Disallow: /minify/ Disallow: /minify-g/ # ======================================== # Crawl-rate tuning for specific engines # ======================================== User-agent: Bingbot Crawl-delay: 1 User-agent: Baiduspider Crawl-delay: 5 User-agent: YandexBot Crawl-delay: 5 # ======================================== # SEO / Backlink tools — throttle heavily # ======================================== User-agent: AhrefsBot Crawl-delay: 10 User-agent: SemrushBot Crawl-delay: 10 User-agent: MJ12bot Crawl-delay: 10 User-agent: PetalBot Crawl-delay: 10 User-agent: Amazonbot Crawl-delay: 10 User-agent: FacebookBot Crawl-delay: 5 # ======================================== # Aggressive / low-value bots — block entirely # ======================================== User-agent: DotBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: Goldfire Server Disallow: / User-agent: Magus Bot Disallow: / User-agent: Bytespider Disallow: / # ======================================== # AI training & data-scraping bots — block content, # but allow access to /llms.txt and /llms-full.txt so they # can read structured site guidance. # (More-specific Allow paths beat the Disallow: / per RFC 9309) # ======================================== User-agent: GPTBot Allow: /llms.txt Allow: /llms-full.txt Disallow: / User-agent: ChatGPT-User Allow: /llms.txt Allow: /llms-full.txt Disallow: / User-agent: CCBot Allow: /llms.txt Allow: /llms-full.txt Disallow: / User-agent: anthropic-ai Allow: /llms.txt Allow: /llms-full.txt Disallow: / User-agent: ClaudeBot Allow: /llms.txt Allow: /llms-full.txt Disallow: / User-agent: Claude-Web Allow: /llms.txt Allow: /llms-full.txt Disallow: / User-agent: cohere-ai Allow: /llms.txt Allow: /llms-full.txt Disallow: / User-agent: Google-Extended Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Meta-ExternalAgent Disallow: / User-agent: Diffbot Disallow: / User-agent: Omgili Disallow: / User-agent: Timpibot Disallow: / # ======================================== # AI search bots — allow so SMTnet appears in AI search results. # ======================================== User-agent: PerplexityBot Allow: /llms.txt Allow: /llms-full.txt Allow: / Crawl-delay: 5 # ======================================== # Anthropic search (future) — uncomment when Anthropic launches # a dedicated search-only bot distinct from ClaudeBot. # ======================================== # User-agent: Claude-SearchBot # Allow: / # Crawl-delay: 5 # ---------------------------------------- # Sitemap & LLM guidance files # ---------------------------------------- Sitemap: https://smtnet.com/sitemap.xml