# # robots.txt # # This file prevents crawling and indexing of certain parts of the site. # Place at the root of your host: https://www.hee.nhs.uk/robots.txt # # ---------------------------------------------------- # Allow mainstream search engines basic access # ---------------------------------------------------- User-agent: * # CSS, JS, Images Allow: /core/*.css$ Allow: /core/*.css? Allow: /core/*.js$ Allow: /core/*.js? Allow: /core/*.gif Allow: /core/*.jpg Allow: /core/*.jpeg Allow: /core/*.png Allow: /core/*.svg Allow: /profiles/*.css$ Allow: /profiles/*.css? Allow: /profiles/*.js$ Allow: /profiles/*.js? Allow: /profiles/*.gif Allow: /profiles/*.jpg Allow: /profiles/*.jpeg Allow: /profiles/*.png Allow: /profiles/*.svg # Directories Disallow: /core/ Disallow: /profiles/ # Files Disallow: /README.md Disallow: /composer/Metapackage/README.txt Disallow: /composer/Plugin/ProjectMessage/README.md Disallow: /composer/Plugin/Scaffold/README.md Disallow: /composer/Plugin/VendorHardening/README.txt Disallow: /composer/Template/README.txt Disallow: /modules/README.txt Disallow: /sites/README.txt Disallow: /themes/README.txt # Paths (clean URLs) Disallow: /admin/ Disallow: /comment/reply/ Disallow: /filter/tips Disallow: /node/add/ Disallow: /search/ Disallow: /user/register Disallow: /user/password Disallow: /user/login Disallow: /user/logout Disallow: /media/oembed Disallow: /*/media/oembed # Paths (no clean URLs) Disallow: /index.php/admin/ Disallow: /index.php/comment/reply/ Disallow: /index.php/filter/tips Disallow: /index.php/node/add/ Disallow: /index.php/search/ Disallow: /index.php/user/password Disallow: /index.php/user/register Disallow: /index.php/user/login Disallow: /index.php/user/logout Disallow: /index.php/media/oembed Disallow: /index.php/*/media/oembed # Extra patterns Disallow: /search Disallow: /search? Disallow: /*?amp # ---------------------------------------------------- # Block AI/data-scraping bots (explicit block per bot) # ---------------------------------------------------- User-agent: AddSearchBot Disallow: / User-agent: AI2Bot Disallow: / User-agent: Ai2Bot-Dolma Disallow: / User-agent: aiHitBot Disallow: / User-agent: Amazonbot Disallow: / User-agent: Andibot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Applebot Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Awario Disallow: / User-agent: bedrockbot Disallow: / User-agent: bigsur.ai Disallow: / User-agent: Brightbot 1.0 Disallow: / User-agent: Bytespider Disallow: / User-agent: CCBot Disallow: / User-agent: ChatGPT Agent Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: Claude-SearchBot Disallow: / User-agent: Claude-User Disallow: / User-agent: Claude-Web Disallow: / User-agent: ClaudeBot Disallow: / User-agent: CloudVertexBot Disallow: / User-agent: cohere-ai Disallow: / User-agent: cohere-training-data-crawler Disallow: / User-agent: Cotoyogi Disallow: / User-agent: Crawlspace Disallow: / User-agent: Datenbank Crawler Disallow: / User-agent: Devin Disallow: / User-agent: Diffbot Disallow: / User-agent: DuckAssistBot Disallow: / User-agent: Echobot Bot Disallow: / User-agent: EchoboxBot Disallow: / User-agent: FacebookBot Disallow: / User-agent: facebookexternalhit Disallow: / User-agent: Factset_spyderbot Disallow: / User-agent: FirecrawlAgent Disallow: / User-agent: FriendlyCrawler Disallow: / User-agent: Gemini-Deep-Research Disallow: / User-agent: Google-CloudVertexBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: Google-Firebase Disallow: / User-agent: GoogleAgent-Mariner Disallow: / User-agent: GoogleOther Disallow: / User-agent: GoogleOther-Image Disallow: / User-agent: GoogleOther-Video Disallow: / User-agent: GPTBot Disallow: / User-agent: iaskspider/2.0 Disallow: / User-agent: ICC-Crawler Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: img2dataset Disallow: / User-agent: ISSCyberRiskCrawler Disallow: / User-agent: Kangaroo Bot Disallow: / User-agent: LinerBot Disallow: / User-agent: meta-externalagent Disallow: / User-agent: Meta-ExternalAgent Disallow: / User-agent: meta-externalfetcher Disallow: / User-agent: Meta-ExternalFetcher Disallow: / User-agent: meta-webindexer Disallow: / User-agent: MistralAI-User Disallow: / User-agent: MistralAI-User/1.0 Disallow: / User-agent: MyCentralAIScraperBot Disallow: / User-agent: netEstate Imprint Crawler Disallow: / User-agent: NovaAct Disallow: / User-agent: OAI-SearchBot Disallow: / User-agent: omgili Disallow: / User-agent: omgilibot Disallow: / User-agent: OpenAI Disallow: / User-agent: Operator Disallow: / User-agent: PanguBot Disallow: / User-agent: Panscient Disallow: / User-agent: panscient.com Disallow: / User-agent: Perplexity-User Disallow: / User-agent: PerplexityBot Disallow: / User-agent: PetalBot Disallow: / User-agent: PhindBot Disallow: / User-agent: Poseidon Research Crawler Disallow: / User-agent: QualifiedBot Disallow: / User-agent: QuillBot Disallow: / User-agent: quillbot.com Disallow: / User-agent: SBIntuitionsBot Disallow: / User-agent: Scrapy Disallow: / User-agent: SemrushBot-OCOB Disallow: / User-agent: SemrushBot-SWA Disallow: / User-agent: ShapBot Disallow: / User-agent: Sidetrade indexer bot Disallow: / User-agent: TerraCotta Disallow: / User-agent: Thinkbot Disallow: / User-agent: TikTokSpider Disallow: / User-agent: Timpibot Disallow: / User-agent: VelenPublicWebCrawler Disallow: / User-agent: WARDBot Disallow: / User-agent: Webzio-Extended Disallow: / User-agent: wpbot Disallow: / User-agent: YaK Disallow: / User-agent: YandexAdditional Disallow: / User-agent: YandexAdditionalBot Disallow: / User-agent: YouBot Disallow: / # ---------------------------------------------------- # Crawl delay and sitemap # ---------------------------------------------------- Crawl-delay: 5 Sitemap: https://www.hee.nhs.uk/sitemap.xml