# ------------------------------------------------------------------------- # GLOBAL SETTINGS # ------------------------------------------------------------------------- User-agent: * Allow: / # CORE WORDPRESS Disallow: /wp-admin/ Disallow: /wp-login.php Disallow: /wp-register.php Allow: /wp-admin/admin-ajax.php # LEGACY PATHS (only block if confirmed NOT used in job URLs) # WARNING: Remove /job/ block if your permalink structure uses /job/ Disallow: /company/ Disallow: /employer/ Disallow: /fr/ Disallow: /?p= # DATE ARCHIVES — SAFER VERSION (no wildcards that catch job slugs) Disallow: /2020/ Disallow: /2021/ Disallow: /2022/ Disallow: /2023/ Disallow: /2024/ Disallow: /2025/ Disallow: /2026/ # PAGINATION Disallow: /category/*/page/ Disallow: /page/ # SEARCH Disallow: /?s= Disallow: /search/ # URL PARAMETERS Disallow: /*?sort= Disallow: /*?filter= Disallow: /*?order= Disallow: /*?job_type= Disallow: /*?utm_source= Disallow: /*?utm_medium= Disallow: /*?utm_campaign= Disallow: /*?ref= Disallow: /*?gad_source= Disallow: /*?gclid= Disallow: /*/amp/ # FEEDS Disallow: /feed/ Disallow: /comments/feed/ Disallow: /*/feed/ # USER ACCOUNT PAGES Disallow: /my-account/ Disallow: /cart/ Disallow: /checkout/ Disallow: /candidate-dashboard/ Disallow: /employer-dashboard/ # PLUGIN DATA Disallow: /wp-content/uploads/wpforms/ # ------------------------------------------------------------------------- # BLOCKED SCRAPERS (keep these) # ------------------------------------------------------------------------- User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: Barkrowler Disallow: / # ------------------------------------------------------------------------- # AI CRAWLERS — ALLOW ALL (traffic opportunity, not a threat) # ------------------------------------------------------------------------- User-agent: ClaudeBot Allow: / User-agent: GPTBot Allow: / User-agent: Google-Extended Allow: / User-agent: Applebot-Extended Allow: / User-agent: Bytespider Allow: / User-agent: Amazonbot Allow: / User-agent: CCBot Allow: / User-agent: meta-externalagent Allow: / # ------------------------------------------------------------------------- # GOOGLE SPECIAL CRAWLERS # ------------------------------------------------------------------------- User-agent: Mediapartners-Google Allow: / # ------------------------------------------------------------------------- # SOCIAL BOTS # ------------------------------------------------------------------------- User-agent: facebookexternalhit Allow: / # ------------------------------------------------------------------------- # SITEMAP # ------------------------------------------------------------------------- Sitemap: https://canadajobbank.org/sitemap.xml