Search

Matched domain: shriker.ca

`robots.txt`

# robots.txt for shriker.ca
# Note: robots.txt is advisory; well-behaved crawlers will follow it, others may not.
# This file blocks user-agents that self-identify as AI / LLM indexing crawlers.

User-agent: OpenAI
Disallow: /

#  OpenAI's data‑collection crawler
User-agent: GPTBot
Disallow: /

# OpenAI's retrieval agent for user browsing
User-agent: ChatGPT-User
Disallow: /

User-agent: Perplexity
Disallow: /

User-agent: PerplexityBot
Disallow: /

User-agent: Anthropic
Disallow: /

User-agent: Abridge
Disallow: /

#  Google's opt‑out mechanism for AI training
User-agent: Google-Extended
Disallow: /

# Amazon uses web data for multiple AI products
User-agent: Amazonbot
Disallow: /

# Anthropic's crawler
User-agent: ClaudeBot
Disallow: /

# Crawler used by Omgili/Similarweb, data is often re-sold
User-agent: Omgilibot
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: Meta-ExternalCrawler
Disallow: /

# Apple's AI‑training extension to Applebot
User-agent: Applebot-Extended
Disallow: /

# Common Crawl (feeds many LLMs)
User-agent: CCBot
Disallow: /

# Generic rule: allow major search engines
User-agent: *
Allow: /

# Generated sitemap 
Sitemap: https://shriker.ca/sitemap.xml

Look up this url in the url tool https://shriker.ca/.well-known/acme-challenge: 404 text/html; charset=utf-8
https://shriker.ca/.well-known/csvm: 404 text/html; charset=utf-8
https://shriker.ca/.well-known/nostr.json: 404 text/html; charset=utf-8
https://shriker.ca/.well-known/security.txt: 404 text/html; charset=utf-8
https://shriker.ca/.well-known/traffic-advice: 404 text/html; charset=utf-8