# # robots.txt for NeworMedia.com # Updated: 27 February 2026 # # Purpose: # Controls access for web crawlers, search engines, and AI agents. # Complements llm.txt to guide LLMs on content access and usage. # # Learn more: https://www.robotstxt.org/robotstxt.html # User-agent: * Crawl-delay: 10 # --- Allow: Essential assets for rendering --- Allow: /misc/*.css$ Allow: /misc/*.css? Allow: /misc/*.js$ Allow: /misc/*.js? Allow: /misc/*.gif Allow: /misc/*.jpg Allow: /misc/*.jpeg Allow: /misc/*.png Allow: /modules/*.css$ Allow: /modules/*.css? Allow: /modules/*.js$ Allow: /modules/*.js? Allow: /modules/*.gif Allow: /modules/*.jpg Allow: /modules/*.jpeg Allow: /modules/*.png Allow: /profiles/*.css$ Allow: /profiles/*.css? Allow: /profiles/*.js$ Allow: /profiles/*.js? Allow: /profiles/*.gif Allow: /profiles/*.jpg Allow: /profiles/*.jpeg Allow: /profiles/*.png Allow: /themes/*.css$ Allow: /themes/*.css? Allow: /themes/*.js$ Allow: /themes/*.js? Allow: /themes/*.gif Allow: /themes/*.jpg Allow: /themes/*.jpeg Allow: /themes/*.png # --- Disallow: Internal and system directories --- Disallow: /includes/ Disallow: /misc/ Disallow: /modules/ Disallow: /profiles/ Disallow: /scripts/ Disallow: /themes/ # --- Disallow: Sensitive or system files --- Disallow: /CHANGELOG.txt Disallow: /cron.php Disallow: /INSTALL.mysql.txt Disallow: /INSTALL.pgsql.txt Disallow: /INSTALL.sqlite.txt Disallow: /install.php Disallow: /INSTALL.txt Disallow: /LICENSE.txt Disallow: /MAINTAINERS.txt Disallow: /update.php Disallow: /UPGRADE.txt Disallow: /xmlrpc.php # --- Disallow: Backend and user paths (clean URLs) --- Disallow: /admin/ Disallow: /comment/reply/ Disallow: /filter/tips/ Disallow: /node/add/ Disallow: /search/ Disallow: /user/register/ Disallow: /user/password/ Disallow: /user/login/ Disallow: /user/logout/ # --- Disallow: Backend and user paths (no clean URLs) --- Disallow: /?q=admin/ Disallow: /?q=comment/reply/ Disallow: /?q=filter/tips/ Disallow: /?q=node/add/ Disallow: /?q=search/ Disallow: /?q=user/password/ Disallow: /?q=user/register/ Disallow: /?q=user/login/ Disallow: /?q=user/logout/ # --- AI / LLM Directives --- # Public blog and educational resources may be crawled. Allow: /blog/ Allow: /resources/ # Restricted business-sensitive areas Disallow: /dashboard/ Disallow: /api/ Disallow: /partners/ Disallow: /publisher-data/ Disallow: /case-studies/ # --- Explicit LLM & AI Crawler Permissions --- # These AI crawlers are permitted to access public content # in accordance with https://www.newormedia.com/llms.txt User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: Google-Extended Allow: / User-agent: ClaudeBot Allow: / User-agent: OmgiliBot Allow: / User-agent: FacebookBot Allow: / User-agent: Bingbot Allow: / User-agent: BingPreview Allow: / # --- Sitemap & AI Policy Reference --- Sitemap: https://www.newormedia.com/sitemap.xml AI-Policy: https://www.newormedia.com/llms.txt