a 5êdgV+ã@sddlZddlZddlZddlZddlZddlmZddlm Z ddl m Z m Z Gdd„dƒZ Gdd„de ejƒZGd d „d e ejƒZGd d „d e ejƒZGd d„de ejƒZGdd„de ƒZGdd„deejƒZGdd„deejƒZGdd„deƒZGdd„de ejƒZGdd„de ejƒZGdd„de ejƒZGdd„de ejƒZGdd „d eƒZGd!d"„d"e ejƒZGd#d$„d$e ejƒZGd%d&„d&e ejƒZGd'd(„d(e ejƒZGd)d*„d*eejƒZ Gd+d,„d,e ejƒZ!Gd-d.„d.e ƒZ"Gd/d0„d0ejƒZ#Gd1d2„d2ejƒZ$e%d3kre &¡dS)4éN)Úsupport)Ú socket_helper)ÚBaseHTTPRequestHandlerÚ HTTPServerc@sHeZdZdZdZgZgZdZdd„Zdd„Z dd „Z d d „Z d d „Z dS)Ú BaseRobotTestÚZtest_robotparserNcCs,t |j¡ ¡}tj ¡|_|j |¡dS©N) ÚioÚStringIOÚ robots_txtÚ readlinesÚurllibÚ robotparserÚRobotFileParserÚparserÚparse)ÚselfÚlines©rú1/usr/local/lib/python3.9/test/test_robotparser.pyÚsetUps zBaseRobotTest.setUpcCs$t|tƒr|\}}||fS|j|fSr)Ú isinstanceÚtupleÚagent©rÚurlrrrrÚget_agent_and_urls zBaseRobotTest.get_agent_and_urlc Cs`|jD]T}| |¡\}}|j||d$| |j ||¡¡Wdƒq1sP0YqdS©N)rr)ÚgoodrÚsubTestÚ assertTruerÚ can_fetchrrrrÚtest_good_urlss zBaseRobotTest.test_good_urlsc Cs`|jD]T}| |¡\}}|j||d$| |j ||¡¡Wdƒq1sP0YqdSr)ÚbadrrÚ assertFalserr!rrrrÚ test_bad_urls#s zBaseRobotTest.test_bad_urlscCs| |j ¡|j¡dSr)Ú assertEqualrÚ site_maps©rrrrÚtest_site_maps)szBaseRobotTest.test_site_maps) Ú__name__Ú __module__Ú __qualname__r rrr#r'rrr"r%r)rrrrr src@s eZdZdZddgZgd¢ZdS)ÚUserAgentWildcardTestz•User-agent: * Disallow: /cyberworld/map/ # This is an infinite virtual URL space Disallow: /tmp/ # these will soon disappear Disallow: /foo.html ú/ú /test.html)ú/cyberworld/map/index.htmlz/tmp/xxxú /foo.htmlN©r*r+r,r rr#rrrrr--sr-c@seZdZdZgd¢ZdgZdS)ÚCrawlDelayAndCustomAgentTestzå# robots.txt for http://www.example.com/ User-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ # This is an infinite virtual URL space # Cybermapper knows where to go. User-agent: cybermapper Disallow: )r.r/)Z cybermapperr0r0Nr2rrrrr38s r3c@s&eZdZdZddgZdgZddgZdS)Ú SitemapTesta# robots.txt for http://www.example.com/ User-agent: * Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml Sitemap: http://www.google.com/hostednews/sitemap_index.xml Request-rate: 3/15 Disallow: /cyberworld/map/ # This is an infinite virtual URL space r.r/r0z7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlN)r*r+r,r rr#r'rrrrr4Is  ÿr4c@seZdZdZgZgd¢ZdS)ÚRejectAllRobotsTestz(# go away User-agent: * Disallow: / )r0r.ú/tmp/Nr2rrrrr5Zsr5c@seZdZdZdZdd„ZdS)ÚBaseRequestRateTestNc CsÂ|j}|j|jD]ª}| |¡\}}|j||dz| | |¡|j¡| |¡}| ||j¡|jdurž| |t j j ¡| |j |jj ¡| |j |jj ¡Wdƒq1s²0YqdSr)rrr#rrr&Ú crawl_delayÚ request_rateZassertIsInstancer rÚ RequestRateZrequestsZseconds)rrrrZparsed_request_raterrrÚtest_request_ratehs(  þþþz%BaseRequestRateTest.test_request_rate)r*r+r,r9r8r;rrrrr7dsr7c@seZdZdZdgZdS)Ú EmptyFileTestrz/fooN)r*r+r,r rrrrrr<€sr<c@s4eZdZdZdZej dd¡ZdZ dgZ gd¢Z dS) ÚCrawlDelayAndRequestRateTestz’User-agent: figtree Crawl-delay: 3 Request-rate: 9/30 Disallow: /tmp Disallow: /a%3cd.html Disallow: /a%2fb.html Disallow: /%7ejoe/index.html Úfigtreeé éé)r>r1)ú/tmpz /tmp.htmlú /tmp/a.htmlú /a%3cd.htmlú /a%3Cd.htmlz /a%2fb.htmlz/~joe/index.htmlN) r*r+r,r rr rr:r9r8rr#rrrrr=…s  r=c@seZdZdZdS)ÚDifferentAgentTestzFigTree Robot libwww-perl/5.04N©r*r+r,rrrrrrF—srFc@s"eZdZdZdgZgd¢ZdZdS)ÚInvalidRequestRateTestzUser-agent: * Disallow: /tmp/ Disallow: /a%3Cd.html Disallow: /a/b.html Disallow: /%7ejoe/index.html Crawl-delay: 3 Request-rate: 9/banana rB)r6rCrDrEz /a/b.htmlz/%7Ejoe/index.htmlrAN)r*r+r,r rr#r8rrrrrH›s rHc@seZdZdZdgZgZdS)ÚInvalidCrawlDelayTestz2User-Agent: * Disallow: /. Crawl-delay: pears r1Nr2rrrrrI«srIc@s eZdZdZdZdgZdgZdS)ÚAnotherInvalidRequestRateTestzeUser-agent: Googlebot Allow: /folder1/myfile.html Disallow: /folder1/ Request-rate: whale/banana Ú Googlebotú/folder1/myfile.htmlú/folder1/anotherfile.htmlN©r*r+r,r rrr#rrrrrJ·srJc@seZdZdZdZdgZdS)ÚUserAgentOrderingTestzMUser-agent: Googlebot Disallow: / User-agent: Googlebot-Mobile Allow: / rKz/something.jpgN)r*r+r,r rr#rrrrrOÄsrOc@seZdZdZdS)ÚUserAgentGoogleMobileTestzGooglebot-MobileNrGrrrrrPÓsrPc@s eZdZdZdZdgZdgZdS)ÚGoogleURLOrderingTestzJUser-agent: Googlebot Allow: /folder1/myfile.html Disallow: /folder1/ Z googlebotrLrMNrNrrrrrQ×srQc@seZdZdZdgZdgZdS)ÚDisallowQueryStringTestz2User-agent: * Disallow: /some/path?name=value ú /some/pathz/some/path?name=valueNr2rrrrrRäsrRc@seZdZdZdgZdgZdS)ÚUseFirstUserAgentWildcardTestzNUser-agent: * Disallow: /some/path User-agent: * Disallow: /another/path z /another/pathrSNr2rrrrrTîsrTc@seZdZdZdgZdgZdS)ÚEmptyQueryStringTestz>User-agent: * Allow: /some/path? Disallow: /another/path? z /some/path?z/another/path?Nr2rrrrrUûsrUc@s0eZdZdZej dd¡ZdZddgZ dgZ dS) ÚDefaultEntryTestzOUser-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ rAéér.r/r0N) r*r+r,r r rr:r9r8rr#rrrrrVs rVc@seZdZdZdZdd„ZdS)ÚStringFormattingTestzÆUser-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ # This is an infinite virtual URL space # Cybermapper knows where to go. User-agent: cybermapper Disallow: /some/path zxUser-agent: cybermapper Disallow: /some/path User-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/cCs| t|jƒ|j¡dSr)r&ÚstrrÚexpected_outputr(rrrÚtest_string_formatting)sz+StringFormattingTest.test_string_formattingN)r*r+r,r r[r\rrrrrYs  rYc@seZdZdd„Zdd„ZdS)Ú RobotHandlercCs| dd¡dS)Ni“zForbidden access)Z send_errorr(rrrÚdo_GET/szRobotHandler.do_GETcGsdSrr)rÚformatÚargsrrrÚ log_message2szRobotHandler.log_messageN)r*r+r,r^rarrrrr]-sr]c@s*eZdZdd„Zdd„Zejdd„ƒZdS)ÚPasswordProtectedSiteTestCasecCsP| tjj¡ttjdftƒ|_t j d|jj ddid|_ d|j _ |j  ¡dS)NrzHTTPServer servingZ poll_intervalg{®Gáz„?)ÚnameÚtargetÚkwargsT)Z addCleanupr ZrequestÚ urlcleanuprrÚHOSTr]ÚserverÚ threadingÚThreadZ serve_foreverÚtÚdaemonÚstartr(rrrr8súz#PasswordProtectedSiteTestCase.setUpcCs"|j ¡|j ¡|j ¡dSr)rhÚshutdownrkÚjoinZ server_closer(rrrÚtearDownHs  z&PasswordProtectedSiteTestCase.tearDowncCs\|jj}dtjdt|dƒ}|d}tj ¡}| |¡|  ¡|  |  d|¡¡dS)Nzhttp://ú:rXz /robots.txtÚ*) rhZserver_addressrrgrZr rrZset_urlÚreadr$r!)rÚaddrrZ robots_urlrrrrÚtestPasswordProtectedSiteMs  z7PasswordProtectedSiteTestCase.testPasswordProtectedSiteN)r*r+r,rrprZ reap_threadsrurrrrrb6srbc@sFeZdZdZd e¡Zedd„ƒZdd„Zdd„Z d d „Z d d „Z d S)ÚNetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtcCsTt d¡t |j¡*tj |j¡|_ |j   ¡Wdƒn1sF0YdS)NZnetwork) rZrequiresrZtransient_internetÚbase_urlr rrr rrs)ÚclsrrrÚ setUpClass]s zNetworkTestCase.setUpClasscCs$d |j|tj |¡dsdnd¡S)Nz{}{}{}rXr.r)r_rwÚosÚpathÚsplitext)rr{rrrrdsÿzNetworkTestCase.urlcCsV| |jj¡| |jj¡| |j ¡d¡| |j d¡¡| |j d¡¡dS)Nrrr)r$rÚ disallow_allÚ allow_allZ assertGreaterÚmtimer8r9r(rrrÚ test_basicis zNetworkTestCase.test_basiccCs˜| |j d| d¡¡¡| |j d|j¡¡| |j d| d¡¡¡| |j d| d¡¡¡| |j d| d¡¡¡| |j d|j¡¡dS)NrrZ elsewhereZNutchZbrianZwebstats)r rr!rr$rwr(rrrÚtest_can_fetchps zNetworkTestCase.test_can_fetchcCsftj | d¡¡}| ¡| |j¡| |j¡|  |  ¡d¡|  |  d¡¡|  |  d¡¡dS)Nz i-robot.txtrrr)r rrrrsr r~r$r}r&rZ assertIsNoner8r9)rrrrrÚ test_read_404xs  zNetworkTestCase.test_read_404N) r*r+r,rwr_r Ú classmethodryrr€rr‚rrrrrvXs  rvÚ__main__)'r rzriZunittestZurllib.robotparserr ÚtestrZ test.supportrZ http.serverrrrZTestCaser-r3r4r5r7r<r=rFrHrIrJrOrPrQrRrTrUrVrYr]rbrvr*ÚmainrrrrÚs@  "          ")