ž ¨ÿfõc@sddZddlZddlZdgZGdd„dƒZGdd„dƒZGdd„dƒZdS( u< robotparser.py Copyright (C) 2000 Bastian Kleineidam You can choose between two licenses when using this package: 1) GNU GPLv2 2) PSF license for Python 2.2 The robots.txt Exclusion Protocol is implemented as specified in http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html iNuRobotFileParsercBs‰|EeZdZdZddd„Zdd„Zdd„Zd d „Zd d „Zd d„Z dd„Z dd„Z dd„Z dS(uRobotFileParserus This class provides a set of methods to read, parse and answer questions about a single robots.txt file. ucCs>g|_d|_d|_d|_|j|ƒd|_dS(NiF(uentriesuNoneu default_entryuFalseu disallow_allu allow_alluset_urlu last_checked(uselfuurl((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__s      uRobotFileParser.__init__cCs|jS(u·Returns the time the robots.txt file was last fetched. This is useful for long-running web spiders that need to check for new robots.txt files periodically. (u last_checked(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyumtimesuRobotFileParser.mtimecCsddl}|jƒ|_dS(uYSets the time the robots.txt file was last fetched to the current time. iN(utimeu last_checked(uselfutime((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyumodified(s uRobotFileParser.modifiedcCs5||_tjj|ƒdd…\|_|_dS(u,Sets the URL referring to a robots.txt file.iiN(uurluurllibuparseuurlparseuhostupath(uselfuurl((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuset_url0s uRobotFileParser.set_urlcCs©ytjj|jƒ}Wnatjjk r|}z;|jdkrOd|_n|jdkrjd|_ nWYdd}~Xn)X|j ƒ}|j |j dƒj ƒƒdS(u4Reads the robots.txt URL and feeds it to the parser.i‘i“iNuutf-8(i‘i“T(uurlliburequestuurlopenuurluerroru HTTPErrorucodeuTrueu disallow_allu allow_allureaduparseudecodeu splitlines(uselfufuerruraw((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuread5s  uRobotFileParser.readcCsAd|jkr-|jdkr=||_q=n|jj|ƒdS(Nu*(u useragentsu default_entryuNoneuentriesuappend(uselfuentry((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu _add_entryBsuRobotFileParser._add_entrycCsd}tƒ}xã|D]Û}|sn|dkr@tƒ}d}qn|dkrn|j|ƒtƒ}d}qnn|jdƒ}|dkrœ|d|…}n|jƒ}|s´qn|jddƒ}t|ƒdkr|djƒjƒ|d–s u+RobotFileParser.__str__..(ujoinuentries(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__•suRobotFileParser.__str__N( u__name__u __module__u __qualname__u__doc__u__init__umtimeumodifieduset_urlureadu _add_entryuparseu can_fetchu__str__(u __locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuRobotFileParsers   3 cBs>|EeZdZdZdd„Zdd„Zdd„ZdS( uRuleLineuoA rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.cCs\|dkr| rd}ntjjtjj|ƒƒ}tjj|ƒ|_||_dS(NuT(uTrueuurllibuparseu urlunparseuurlparseuquoteupathu allowance(uselfupathu allowance((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__œs  uRuleLine.__init__cCs|jdkp|j|jƒS(Nu*(upathu startswith(uselfufilename((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu applies_to¤suRuleLine.applies_tocCs|jrdpdd|jS(NuAllowuDisallowu: (u allowanceupath(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__§suRuleLine.__str__N(u__name__u __module__u __qualname__u__doc__u__init__u applies_tou__str__(u __locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuRuleLine™s  uRuleLinecBsJ|EeZdZdZdd„Zdd„Zdd„Zdd „Zd S( uEntryu?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS(N(u useragentsu rulelines(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__­s uEntry.__init__cCsjg}x'|jD]}|jd|dgƒqWx*|jD]}|jt|ƒdgƒq:Wdj|ƒS(Nu User-agent: u u(u useragentsuextendu rulelinesustrujoin(uselfuretuagentuline((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__±s u Entry.__str__cCs]|jdƒdjƒ}x=|jD]2}|dkr9dS|jƒ}||kr#dSq#WdS(u2check if this entry applies to the specified agentu/iu*TF(usplituloweru useragentsuTrueuFalse(uselfu useragentuagent((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu applies_to¹s   uEntry.applies_tocCs.x'|jD]}|j|ƒr |jSq WdS(uZPreconditions: - our agent applies to this entry - filename is URL decodedT(u rulelinesu applies_tou allowanceuTrue(uselfufilenameuline((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu allowanceÆs uEntry.allowanceN(u__name__u __module__u __qualname__u__doc__u__init__u__str__u applies_tou allowance(u __locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuEntry«s    uEntry(u__doc__u urllib.parseuurllibuurllib.requestu__all__uRobotFileParseruRuleLineuEntry(((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu s  ˆ