## Standard search crawlers (Googlebot, Bingbot, etc.)
User-agent: *
Allow: /
Disallow: /admin/

## AI training crawlers -- opt out
## These respect robots.txt for training opt-out per each vendor's published
## documentation. Mirror policy in /ai.txt. Pre-existing copyright applies.

# OpenAI training
User-agent: GPTBot
Disallow: /

# Anthropic training (current + legacy)
User-agent: ClaudeBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Claude-Web
Disallow: /

# Google Gemini training opt-out signal
User-agent: Google-Extended
Disallow: /

# Apple Intelligence training opt-out signal
User-agent: Applebot-Extended
Disallow: /

# Common Crawl (frequent training corpus source)
User-agent: CCBot
Disallow: /

# ByteDance / TikTok training
User-agent: Bytespider
Disallow: /

# Meta training crawlers
User-agent: Meta-ExternalAgent
Disallow: /

User-agent: FacebookBot
Disallow: /

# Cohere training
User-agent: cohere-ai
Disallow: /

# Webz.io / Omgili (often resold as training data)
User-agent: omgili
Disallow: /

# Amazon
User-agent: Amazonbot
Disallow: /

## AI search / citation crawlers (user-triggered fetches, not training) -- allow

# OpenAI search + user-triggered fetch
User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

# Anthropic search + user-triggered fetch
User-agent: Claude-User
Allow: /

User-agent: Claude-SearchBot
Allow: /

# Perplexity citations
User-agent: PerplexityBot
Allow: /

Sitemap: https://screenfine.info/sitemap-index.xml
Sitemap: https://screenfine.info/sitemap-posts.xml