# SciSummary robots.txt # Last updated: 2025-01-17 # Optimized with comprehensive AI crawler directives # Default rules for all crawlers User-agent: * # Allow public content Allow: / Allow: /blog Allow: /blog/* Allow: /how-to-use Allow: /how-to-use/* Allow: /about Allow: /affiliates Allow: /privacy-policy Allow: /terms-of-service Allow: /register Allow: /login Allow: /ai Allow: /for-ai Allow: /research-paper-mcp-server # Allow API documentation (critical for AI discovery) Allow: /api-documentation Allow: /api/pricing # Block private/authenticated areas Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /summarize Disallow: /abstracts/* Disallow: /ref/* Disallow: /logged-out # Block actual API endpoints (usage, not documentation) Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # AI-specific crawlers - allow all public content # OpenAI GPT User-agent: GPTBot User-agent: ChatGPT-User Allow: / Allow: /api-documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Anthropic Claude (all variants) User-agent: Claude-Web User-agent: ClaudeBot User-agent: anthropic-ai User-agent: Anthropic Allow: / Allow: /api-documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Google AI (Bard/Gemini) User-agent: Google-Extended Allow: / Allow: /api-documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Perplexity AI User-agent: PerplexityBot Allow: / Allow: /api-documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Apple Intelligence User-agent: Applebot-Extended Allow: / Allow: /api-documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Cohere AI User-agent: cohere-ai Allow: / Allow: /api-documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Meta AI (LLaMA) User-agent: FacebookBot User-agent: Meta-ExternalAgent User-agent: Meta-ExternalFetcher Allow: / Allow: /spec/ Allow: /api/documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Amazon AI (Alexa) User-agent: Amazonbot Allow: / Allow: /spec/ Allow: /api/documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Bytedance AI (Bytespider) User-agent: Bytespider Allow: / Allow: /spec/ Allow: /api/documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Diffbot (AI web scraping) User-agent: Diffbot Allow: / Allow: /spec/ Allow: /api/documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Omgili (web content bot) User-agent: omgili User-agent: omgilibot Allow: / Allow: /spec/ Allow: /blog Allow: /blog/* Allow: /how-to-use Allow: /how-to-use/* Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /summarize Disallow: /abstracts/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # YouBot (You.com AI search) User-agent: YouBot Allow: / Allow: /spec/ Allow: /api/documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # AI2Bot (Allen Institute for AI) User-agent: AI2Bot Allow: / Allow: /spec/ Allow: /api/documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Kangaroo Bot (AI training) User-agent: kangaroo_bot Allow: / Allow: /spec/ Allow: /blog Allow: /blog/* Allow: /how-to-use Allow: /how-to-use/* Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /summarize Disallow: /abstracts/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Scrapy (commonly used for AI data collection) User-agent: Scrapy Disallow: / # Generic AI scrapers to block (aggressive or unknown crawlers) User-agent: PetalBot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / # Common Crawl (used by many AI models for training) User-agent: CCBot Allow: / Allow: /blog Allow: /blog/* Allow: /how-to-use Allow: /how-to-use/* Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /summarize Disallow: /abstracts/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* Crawl-delay: 1 # Mistral AI User-agent: MistralBot Allow: / Allow: /spec/ Allow: /api/documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Inflection AI (Pi) User-agent: InflectionBot Allow: / Allow: /spec/ Allow: /api/documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Stability AI User-agent: StabilityBot Allow: / Allow: /spec/ Allow: /api/documentation Disallow: /dashboard Disallow: /dashboard/* Disallow: /request/* Disallow: /profile Disallow: /profile/* Disallow: /ref/* Disallow: /logged-out Disallow: /api/tokens Disallow: /api/tokens/* Disallow: /api/batches Disallow: /api/batches/* Disallow: /api/batch Disallow: /api/batch/* # Sitemap location Sitemap: https://scisummary.com/sitemap.xml # Additional notes: # - All AI crawlers have access to public content and API documentation # - Private user areas (dashboard, profile, etc.) are blocked for all crawlers # - API endpoints for token/batch operations are blocked to prevent misuse # - Aggressive scrapers without clear AI purposes are completely blocked