# Robots.txt for main shopping site, allow access to mostly everything # # Discovery Engine uses a crippling DoS style crawl, block it # User-agent: discobot Disallow: / # restrict ip-web-crawler on all pages - copyright detection crawler - yuk. User-agent: ip-web-crawler.com Disallow: / #Allow our spider different rules User-agent: miessence-spider Disallow: /shop/en/checkout Disallow: /shop/ja/checkout #since urls are now shopname prefixed in the URLs block every thing except company site /shop/* unless its the about your rep sites User-agent: * Allow: /$ Allow: /shop Allow: /sitemap.xml Allow: /shop/en/checkout/join-options Allow: /shop/ja/checkout/join-options Disallow: /shop/en/checkout Disallow: /shop/ja/checkout Disallow: /shop/en/product/5* Disallow: /shop/en/offers Disallow: /shop/ja/offers Allow: */ourCompany/yourRep Disallow: / Sitemap: http://www.miessence.com/sitemap.xml