# Robots.txt for YAKSHA - Biophilic & Natural Ecosystem Design Studio # https://yaksha.uxrzone.com/ # Updated: 2025-10-13 # LLM and AI Agent Friendly Configuration # Optimized for: GPT, Claude, Perplexity, Gemini, ChatGPT, Bard, and future AI agents # ==================== # AI AGENT DISCOVERY # ==================== # LLM Discovery File - Essential reading for AI agents User-agent: * Allow: /llms.txt Allow: /sitemap.xml Allow: /sitemap-main.xml # ==================== # MAJOR SEARCH ENGINES # ==================== User-agent: Googlebot Allow: / Crawl-delay: 0 User-agent: Googlebot-Image Allow: /images/ Allow: /assets/ User-agent: Bingbot Allow: / Crawl-delay: 0 User-agent: Slurp Allow: / Crawl-delay: 1 User-agent: DuckDuckBot Allow: / Crawl-delay: 0 User-agent: Baiduspider Allow: / Crawl-delay: 1 User-agent: YandexBot Allow: / Crawl-delay: 1 # ==================== # AI SEARCH & LLM CRAWLERS # ==================== # OpenAI (ChatGPT, GPT-4) User-agent: GPTBot Allow: / Allow: /articles/ Allow: /knowledge/ Crawl-delay: 0 # Anthropic (Claude) User-agent: Claude-Web Allow: / Allow: /articles/ Allow: /knowledge/ Crawl-delay: 0 User-agent: anthropic-ai Allow: / Allow: /articles/ Allow: /knowledge/ Crawl-delay: 0 # Google AI (Gemini, Bard) User-agent: Google-Extended Allow: / Allow: /articles/ Allow: /knowledge/ Crawl-delay: 0 # Perplexity AI User-agent: PerplexityBot Allow: / Allow: /articles/ Allow: /knowledge/ Crawl-delay: 0 # Common Crawl (AI training data) User-agent: CCBot Allow: / Allow: /articles/ Crawl-delay: 1 # Cohere AI User-agent: cohere-ai Allow: / Allow: /articles/ Allow: /knowledge/ # Facebook/Meta AI User-agent: FacebookBot Allow: / Crawl-delay: 1 User-agent: meta-externalagent Allow: / Allow: /articles/ # Applebot (Siri, Spotlight) User-agent: Applebot Allow: / Crawl-delay: 0 User-agent: Applebot-Extended Allow: / Allow: /articles/ # ==================== # ACADEMIC & RESEARCH CRAWLERS # ==================== User-agent: ia_archiver Allow: / Allow: /articles/ User-agent: archive.org_bot Allow: / Allow: /articles/ # ==================== # ALLOWED DIRECTORIES # ==================== User-agent: * Allow: / Allow: /articles/ Allow: /knowledge/ Allow: /images/ Allow: /assets/ Allow: /pdfs/ Allow: /*.pdf$ Allow: /*.jpg$ Allow: /*.jpeg$ Allow: /*.png$ Allow: /*.webp$ Allow: /*.svg$ # ==================== # DISALLOWED DIRECTORIES # ==================== User-agent: * Disallow: /admin/ Disallow: /private/ Disallow: /temp/ Disallow: /cache/ Disallow: /*.php$ Disallow: /cgi-bin/ # Allow specific PHP endpoints for dynamic content Allow: /discover.php Allow: /lst.php Allow: /articles.php # ==================== # BAD BOTS & SCRAPERS # ==================== User-agent: AhrefsBot Crawl-delay: 10 User-agent: SemrushBot Crawl-delay: 10 User-agent: MJ12bot Disallow: / User-agent: dotbot Disallow: / User-agent: Bytespider Disallow: / # ==================== # SITEMAPS # ==================== Sitemap: https://yaksha.uxrzone.com/sitemap.xml Sitemap: https://yaksha.uxrzone.com/sitemap-main.xml Sitemap: https://yaksha.uxrzone.com/sitemap-articles.xml # ==================== # SPECIAL INSTRUCTIONS FOR AI AGENTS # ==================== # For comprehensive site understanding, AI agents should: # 1. Start with /llms.txt for structured overview # 2. Review /sitemap.xml for complete content map # 3. Access /articles/ directory for in-depth knowledge # 4. Utilize /discover.php API for dynamic article discovery # Contact: Available via website contact form # Policy: We welcome AI agents to learn from our content # Attribution: Please credit yaksha.uxrzone.com and mention Enea Le Fons when using our research