# robots.txt for https://gmelius.com # ------------------------------------------------------------------ # 1. GLOBAL RULES – WHAT EVERY CRAWLER MAY / MAY NOT FETCH # ------------------------------------------------------------------ User-agent: * # — Everything public is crawlable by default Allow: / # --- BLOCK internal search, previews, and other thin pages ------- Disallow: /search # Webflow on-site search results Disallow: /*?query= # Alt query parameter for search Disallow: /*?s= # Legacy query parameter copies Disallow: /preview/ # Any staging or preview folders Disallow: /*?preview=* # Preview query strings # --- BLOCK duplicate tracking-parameter URLs --------------------- Disallow: /*?utm_* Disallow: /*&utm_* Disallow: /*?ref=* Disallow: /*&ref=* Disallow: /*?fbclid=* Disallow: /*?gclid=* Disallow: /*?mc_cid=* Disallow: /*?mc_eid=* # --- OPTIONAL: clean out tracking params for Google -------------- # (ignored by Bing/Yandex, but worthwhile for Googlebot) Clean-param: utm_source&utm_medium&utm_campaign&utm_term&utm_content&fbclid&gclid&ref # ------------------------------------------------------------------ # 2. GOOGLE SPECIALTY BOTS – inherit * rules, no extra blocks # ------------------------------------------------------------------ User-agent: Googlebot-Image Disallow: User-agent: Googlebot-News Disallow: # ------------------------------------------------------------------ # 3. LARGE THIRD-PARTY SEO CRAWLERS – slow them down politely # ------------------------------------------------------------------ User-agent: AhrefsBot Crawl-delay: 10 User-agent: SemrushBot Crawl-delay: 10 User-agent: MJ12bot Crawl-delay: 10 # ------------------------------------------------------------------ # 4. YANDEX – optional 'Host' directive (ignored by others) # ------------------------------------------------------------------ Host: gmelius.com Sitemap: https://gmelius.com/sitemap.xml