# ***************************************************** # # Aspirateurs de sites: keep it/them out of everything. # **************************************************** # User-agent: teleport Disallow: / User-agent: wget Disallow: / User-agent: webzip Disallow: / User-agent: webcopier Disallow: / User-agent: offline Disallow: / User-agent: webbandit Disallow: / User-agent: webstripper Disallow: / User-agent: sitesnagger Disallow: / User-agent: website Disallow: / User-agent: teleportpro Disallow: / User-agent: webstripper Disallow: / # *********************************************** # # Mail collectors: keep it/them out of everything. # *********************************************** # User-agent: crescent Disallow: / User-agent: emailsiphon Disallow: / User-agent: extractorpro Disallow: / User-agent: emailcollector Disallow: / User-agent: emailwolf Disallow: / User-agent: nicerspro Disallow: / # ************************************************** # # Other specific one: keep it/them out of everything. # ************************************************** # User-agent: scooter # Robot d'Astalavista Disallow: / # ************************ # # Other "good" webspiders: # ************************ # User-agent: * # match all other bots. Crawl-delay: 10 # number of seconds to wait between successive requests to the same server, by crawlers. Visit-time: 0000-1200 # only visit between 00:00 and 12:00 UTC (GMT) # Now keep them out of: Disallow: /META-INF Disallow: /WEB-INF Disallow: /robots.txt Disallow: /backoffice Disallow: /templates/common/css Disallow: /templates/common/js Disallow: /templates/common/framworks Disallow: /ajax Disallow: /emailing/redirection Disallow: /showarticlefile Disallow: /module/addtobookmarks # Pour éviter l'indexation de /module/addtobookmarks mais aussi /module/addtobookmarks/ et tout ce qui pourrait suivre dans l'URL... Disallow: /module/addtomyalerts Disallow: /module/impression Disallow: /module/lastviewedarticles Disallow: /module/recommendthispublication Disallow: /module/sendtoafriend