IP = 193.48.96.10
robots.txt# HAL robots.txt
# If you want to download lots of metadata, please use our API at https://api.archives-ouvertes.fr/
# The API is far more efficient for metadata harvesting
# To learn more, please contact hal-support@ccsd.cnrs.fr
User-Agent: *
Disallow: /RePEc/
Disallow: /search/
Disallow: /*/search/
Disallow: /*/browse/last
Disallow: /browse/last
Disallow: /*/browse/latest-publications
Disallow: /browse/latest-publications
Disallow: /browse/domain
Disallow: /*/browse/domain
Disallow: /browse/author-structure
Disallow: /*/browse/author-structure
Disallow: /browse/laboratory
Disallow: /*/browse/laboratory
Disallow: /browse/author
Disallow: /*/browse/author
Disallow: */tei
Disallow: */rdf
Disallow: */bibtex
Disallow: */dc
Disallow: */datacite
Disallow: */openaire
Disallow: */dcterms
Disallow: */endnote
Disallow: */json
Disallow: /ping
Disallow: /login
Disallow: /submit
Disallow: /user
Disallow: /*/user/*
Disallow: /error
Disallow: */preview/*
Disallow: /view/resolver/*
Disallow: */ajax*
Disallow: */widget*
User-agent: SemanticScholarBot
Crawl-delay: 5
User-agent: TurnitinBot
Crawl-delay: 5
User-agent: MegaIndex.ru
Disallow: /
User-agent: SemrushBot
Disallow: /
User-agent: SemrushBot-SA
Disallow: /
User-agent: Seekport Crawler
Disallow: /
User-agent: ltx71 - (http://ltx71.com/)
Disallow: /
User-agent: AhrefsBot
Disallow: /
User-agent: Riddler
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: Diffbot
Disallow: /
User-agent: GPTBot
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: Omgilibot
Disallow: /
User-agent: omgili
Disallow: /
User-agent: FacebookBot
Disallow: /
User-agent: cohere-ai
Disallow: /
User-agent: anthropic-ai
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: Amazonbot
Disallow: /
User-agent: Applebot
Disallow: /
User-agent: Applebot-Extended
Disallow: /
User-agent: PerplexityBot
Disallow: /
User-agent: YouBot
Disallow: /
User-agent: AdsBot-Google
Disallow: /
User-agent: AwarioRssBot
Disallow: /
User-agent: AwarioSmartBot
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: Claude-Web
Disallow: /
User-agent: DataForSeoBot
Disallow: /
User-agent: FriendlyCrawler
Disallow: /
User-agent: GoogleOther
Disallow: /
User-agent: ImagesiftBot
Disallow: /
User-agent: magpie-crawler
Disallow: /
User-agent: Meltwater
Disallow: /
User-agent: peer39_crawler
Disallow: /
User-agent: PiplBot
Disallow: /
User-agent: Seekr
Disallow: /
User-agent: Meta-ExternalAgent
Disallow: /
User-agent: Timpibot
Disallow: /
User-agent: SummalyBot
Disallow: /
User-agent: facebookexternalhit
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: ICC-Crawler
Disallow: /
User-agent: img2dataset
Disallow: /
User-agent: OAI-SearchBot
Disallow: /
User-agent: Omgilibot
Disallow: /
User-agent: PetalBot
Disallow: /
User-agent: Scrapy
Disallow: /
User-agent: VelenPublicWebCrawler
Disallow: /
# Sitemap
Sitemap: http://hal.science/robots/sitemap
Look up this url in the url tool
https://hal.archives-ouvertes.fr/.well-known/acme-challenge: 404 text/html; charset=UTF-8
https://hal.archives-ouvertes.fr/.well-known/csvm: 404 text/html; charset=UTF-8
https://hal.archives-ouvertes.fr/.well-known/nostr.json: 404 text/html; charset=UTF-8
https://hal.archives-ouvertes.fr/.well-known/security.txt: 404 text/html; charset=UTF-8
https://hal.archives-ouvertes.fr/.well-known/traffic-advice: 404 text/html; charset=UTF-8