# robots.txt for https://gmelius.com

# ------------------------------------------------------------------
# 1. GLOBAL RULES – WHAT EVERY CRAWLER MAY / MAY NOT FETCH
# ------------------------------------------------------------------
User-agent: *
# — Everything public is crawlable by default
Allow: /

# --- BLOCK internal search, previews, and other thin pages -------
Disallow: /search           # Webflow on-site search results
Disallow: /*?query=         # Alt query parameter for search
Disallow: /*?s=             # Legacy query parameter copies
Disallow: /preview/         # Any staging or preview folders
Disallow: /*?preview=*      # Preview query strings

# --- BLOCK duplicate tracking-parameter URLs ---------------------
Disallow: /*?utm_*
Disallow: /*&utm_*
Disallow: /*?ref=*
Disallow: /*&ref=*
Disallow: /*?fbclid=*
Disallow: /*?gclid=*
Disallow: /*?mc_cid=*
Disallow: /*?mc_eid=*

# --- OPTIONAL: clean out tracking params for Google --------------
#   (ignored by Bing/Yandex, but worthwhile for Googlebot)
Clean-param: utm_source&utm_medium&utm_campaign&utm_term&utm_content&fbclid&gclid&ref


# ------------------------------------------------------------------
# 2. GOOGLE SPECIALTY BOTS – inherit * rules, no extra blocks
# ------------------------------------------------------------------
User-agent: Googlebot-Image
Disallow:

User-agent: Googlebot-News
Disallow:

# ------------------------------------------------------------------
# 3. LARGE THIRD-PARTY SEO CRAWLERS – slow them down politely
# ------------------------------------------------------------------
User-agent: AhrefsBot
Crawl-delay: 10

User-agent: SemrushBot
Crawl-delay: 10

User-agent: MJ12bot
Crawl-delay: 10

# ------------------------------------------------------------------
# 4. YANDEX – optional 'Host' directive (ignored by others)
# ------------------------------------------------------------------
Host: gmelius.com


Sitemap: https://gmelius.com/sitemap.xml