
    0hhb>                        S SK r S SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SKJ
r
  S SKJrJr   " S S5      r " S S	\\R                  5      r " S
 S\\R                  5      r " S S\\R                  5      r " S S\\R                  5      r " S S\5      r " S S\\R                  5      r " S S\\R                  5      r " S S\5      r " S S\\R                  5      r " S S\\R                  5      r " S S\\R                  5      r " S S\\R                  5      r " S  S!\5      r " S" S#\\R                  5      r " S$ S%\\R                  5      r " S& S'\\R                  5      r " S( S)\\R                  5      r  " S* S+\\R                  5      r! " S, S-\\R                  5      r"\RF                  " \RH                  S.5       " S/ S05      5       r%S1r& " S2 S3\%\R                  5      r' " S4 S5\%\R                  5      r(\RR                  " 5        " S6 S7\R                  5      5       r*\+S8:X  a  \RX                  " 5         gg)9    N)support)socket_helper)threading_helper)BaseHTTPRequestHandler
HTTPServerc                   Z   ^  \ rS rSrSrSr/ r/ rSrU 4S jr	S r
S rS rS	 rS
 rSrU =r$ )BaseRobotTest    test_robotparserNc                    > [         TU ]  5         U R                  (       d  S U l        U R                  (       d  S U l        g g N)super__init_subclass__goodtest_good_urlsbadtest_bad_urls)cls	__class__s    >/opt/python-3.13.8/usr/lib/python3.13/test/test_robotparser.pyr   BaseRobotTest.__init_subclass__   s1    !#xx!%Cww $C     c                     [         R                  " U R                  5      R                  5       n[        R
                  R                  5       U l        U R                  R                  U5        g r   )	ioStringIO
robots_txt	readlinesurllibrobotparserRobotFileParserparserparse)selfliness     r   setUpBaseRobotTest.setUp   sE    DOO,668((88:% r   c                 V    [        U[        5      (       a  Uu  p!X!4$ U R                  U4$ r   )
isinstancetupleagentr$   urlr+   s      r   get_agent_and_urlBaseRobotTest.get_agent_and_url    s,    c5!!JE:zz3r   c                     U R                    HX  nU R                  U5      u  p!U R                  XS9   U R                  U R                  R                  X!5      5        S S S 5        MZ     g ! , (       d  f       Ml  = fN)r-   r+   )r   r.   subTest
assertTruer"   	can_fetchr,   s      r   r   BaseRobotTest.test_good_urls&   sZ    99C//4JE#3 5 5e AB 43 33   +A**
A9	c                     U R                    HX  nU R                  U5      u  p!U R                  XS9   U R                  U R                  R                  X!5      5        S S S 5        MZ     g ! , (       d  f       Ml  = fr1   )r   r.   r2   assertFalser"   r4   r,   s      r   r   BaseRobotTest.test_bad_urls,   s\    88C//4JE#3  !6!6u!BC 43 33r6   c                 l    U R                  U R                  R                  5       U R                  5        g r   )assertEqualr"   	site_mapsr$   s    r   test_site_mapsBaseRobotTest.test_site_maps2   s#    ..0$..Ar   )r"   )__name__
__module____qualname____firstlineno__r   r+   r   r   r<   r   r&   r.   r   r   r>   __static_attributes____classcell__)r   s   @r   r	   r	      sC    JED
CI%!
CDB Br   r	   c                   (    \ rS rSrSrSS/r/ SQrSrg)UserAgentWildcardTest6   zUser-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
    /
/test.html)/cyberworld/map/index.htmlz/tmp/xxx	/foo.html Nr@   rA   rB   rC   r   r   r   rD   rM   r   r   rG   rG   6   s    J D
ACr   rG   c                   &    \ rS rSrSr/ SQrS/rSrg)CrawlDelayAndCustomAgentTestA   z# robots.txt for http://www.example.com/

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
    )rI   rJ   )cybermapperrK   rK   rM   NrN   rM   r   r   rP   rP   A   s    J ND'
(Cr   rP   c                   .    \ rS rSrSrSS/rS/rSS/rSrg	)
SitemapTestR   a  # robots.txt for http://www.example.com/

User-agent: *
Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml
Sitemap: http://www.google.com/hostednews/sitemap_index.xml
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

    rI   rJ   rK   z7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlrM   N)	r@   rA   rB   rC   r   r   r   r<   rD   rM   r   r   rT   rT   R   s+    	J D'
(CJEGIr   rT   c                   $    \ rS rSrSr/ r/ SQrSrg)RejectAllRobotsTestc   z(# go away
User-agent: *
Disallow: /
    )rK   rI   /tmp/rM   NrN   rM   r   r   rW   rW   c   s    J
 D
6Cr   rW   c                   "    \ rS rSrSrSrS rSrg)BaseRequestRateTestm   Nc                    U R                   nU R                  U R                  -    GH  nU R                  U5      u  p2U R	                  X#S9   U R                  UR                  U5      U R                  5        UR                  U5      nU R                  X@R                  5        U R                  b  U R                  U[        R                  R                  5        U R                  UR                  U R                  R                  5        U R                  UR                  U R                  R                  5        S S S 5        GM      g ! , (       d  f       GM3  = fr1   )r"   r   r   r.   r2   r;   crawl_delayrequest_rateassertIsInstancer   r    RequestRaterequestsseconds)r$   r"   r-   r+   parsed_request_rates        r   test_request_rate%BaseRequestRateTest.test_request_rateq   s   99txx'C//4JE#3  !3!3E!:D<L<LM&,&9&9%&@#  !46G6GH$$0))+**66 $$+44))22 $$+33))11 43 (33s   C/E		
E	rM   )r@   rA   rB   rC   r_   r^   re   rD   rM   r   r   r[   r[   m   s    LKr   r[   c                       \ rS rSrSrS/rSrg)EmptyFileTest   r   z/foorM   N)r@   rA   rB   rC   r   r   rD   rM   r   r   rh   rh      s    J8Dr   rh   c                   f    \ rS rSrSrSr\R                  R                  SS5      r	Sr
S/r/ SQrS	rg
)CrawlDelayAndRequestRateTest   zUser-agent: figtree
Crawl-delay: 3
Request-rate: 9/30
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
    figtree	         )rm   rL   )/tmpz	/tmp.html/tmp/a.html/a%3cd.html/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.htmlrM   N)r@   rA   rB   rC   r   r+   r   r    ra   r_   r^   r   r   rD   rM   r   r   rk   rk      s<    J E%%11!R8LK$%D.Cr   rk   c                       \ rS rSrSrSrg)DifferentAgentTest   zFigTree Robot libwww-perl/5.04rM   Nr@   rA   rB   rC   r+   rD   rM   r   r   rv   rv      s    ,Er   rv   c                   *    \ rS rSrSrS/r/ SQrSrSrg)InvalidRequestRateTest   zUser-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
Crawl-delay: 3
Request-rate: 9/banana
    rq   )rY   rr   rs   rt   z	/a/b.htmlz/%7Ejoe/index.htmlrp   rM   N)	r@   rA   rB   rC   r   r   r   r^   rD   rM   r   r   rz   rz      s    J 8D!CKr   rz   c                   "    \ rS rSrSrS/r/ rSrg)InvalidCrawlDelayTest   z2User-Agent: *
Disallow: /.
Crawl-delay: pears
    rL   rM   NrN   rM   r   r   r}   r}      s    J
 =D
Cr   r}   c                   (    \ rS rSrSrSrS/rS/rSrg)AnotherInvalidRequestRateTest   zeUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
Request-rate: whale/banana
    	Googlebot/folder1/myfile.html/folder1/anotherfile.htmlrM   N	r@   rA   rB   rC   r   r+   r   r   rD   rM   r   r   r   r      s     J E"#D&
'Cr   r   c                   "    \ rS rSrSrSrS/rSrg)UserAgentOrderingTest   zMUser-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
    r   z/something.jpgrM   N)r@   rA   rB   rC   r   r+   r   rD   rM   r   r   r   r      s    J E
Cr   r   c                       \ rS rSrSrSrg)UserAgentGoogleMobileTest   zGooglebot-MobilerM   Nrx   rM   r   r   r   r      s    Er   r   c                   (    \ rS rSrSrSrS/rS/rSrg)GoogleURLOrderingTest   zJUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
    	googlebotr   r   rM   Nr   rM   r   r   r   r      s     J
 E"#D&
'Cr   r   c                   (    \ rS rSrSr/ SQr/ SQrSrg)DisallowQueryStringTest   zsUser-agent: *
Disallow: /some/path?name=value
Disallow: /another/path?
Disallow: /yet/one/path?name=value&more
    )
/some/pathz/some/path?z/some/path%3Fname=valuez/some/path?name%3Dvalue/another/pathz/another/path%3Fz/yet/one/path?name=value%26more)z#/some/path?name=value/another/path?z/another/path?name=valuez/yet/one/path?name=value&morerM   NrN   rM   r   r   r   r      s    J/D,Cr   r   c                   $    \ rS rSrSrS/rS/rSrg)UseFirstUserAgentWildcardTest   zNUser-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
    r   r   rM   NrN   rM   r   r   r   r      s    J D.Cr   r   c                      \ rS rSrSr/ SQr/ SQrS Ht  r\S\ S\" \5      S S	3-  r\R                  S
\ 35        \R                  S\" \5      S 35        \R                  S\ 35        \R                  S\" \5      S 35        Mv     Sr
g)PercentEncodingTesti  uy  User-agent: *
Disallow: /a1/Z-._~ # unreserved characters
Disallow: /a2/%5A%2D%2E%5F%7E # percent-encoded unreserved characters
Disallow: /u1/%F0%9F%90%8D # percent-encoded ASCII Unicode character
Disallow: /u2/%f0%9f%90%8d
Disallow: /u3/🐍 # raw non-ASCII Unicode character
Disallow: /v1/%F0 # percent-encoded non-ASCII octet
Disallow: /v2/%f0
Disallow: /v3/ # raw non-ASCII octet
Disallow: /p1%xy # raw percent
Disallow: /p2%
Disallow: /p3%25xy # percent-encoded percent
Disallow: /p4%2525xy # double percent-encoded percent
Disallow: /john%20smith # space
Disallow: /john doe
Disallow: /trailingspace%20
Disallow: /question%3Fq=v # not query
Disallow: /hash%23f # not fragment
Disallow: /dollar%24
Disallow: /asterisk%2A
Disallow: /sub/dir
Disallow: /slash%2F
Disallow: /query/question?q=%3F
Disallow: /query/raw/question?q=?
Disallow: /query/eq?q%3Dv
Disallow: /query/amp?q=v%26a
)z/u1/%F0z/u1/%f0z/u2/%F0z/u2/%f0z/u3/%F0z/u3/%f0z
/p1%2525xyz/p2%f0z
/p3%2525xyz/p4%xyz/p4%25xyz/question?q=vz/dollarz	/asteriskz/query/eq?q=vz/query/amp?q=v&a)9z	/a1/Z-._~z/a1/%5A%2D%2E%5F%7Ez	/a2/Z-._~z/a2/%5A%2D%2E%5F%7Ez/u1/%F0%9F%90%8Dz/u1/%f0%9f%90%8du   /u1/🐍z/u2/%F0%9F%90%8Dz/u2/%f0%9f%90%8du   /u2/🐍z/u3/%F0%9F%90%8Dz/u3/%f0%9f%90%8du   /u3/🐍z/v1/%F0z/v1/%f0u   /v1/u   /v1/🐍z/v2/%F0z/v2/%f0u   /v2/u   /v2/🐍z/v3/%F0z/v3/%f0u   /v3/u   /v3/🐍z/p1%xyz/p1%25xyz/p2%z/p2%25z/p2%2525z/p2%xyz/p3%xyz/p3%25xyz
/p4%2525xyz/john%20smithz/john smithz/john%20doez	/john doez/trailingspace%20z/trailingspace z/question%3Fq=vz/hash#fz	/hash%23fz/dollar$z
/dollar%24z
/asterisk*z/asterisk%2Az/sub/dirz
/sub%2Fdirz	/slash%2Fz/slash/z/query/question?q=?z/query/question?q=%3Fz/query/raw/question?q=?z/query/raw/question?q=%3Fz/query/eq?q%3Dvz/query/amp?q=v%26az:/[]@!$&'()*+,;=zDisallow: /rawz
Disallow: /pc%02X
z/rawz/raw%z/pcz/pc%rM   N)r@   rA   rB   rC   r   r   r   cordappendrD   rM   r   r   r   r     s    J8	DC8  qc)9#a&RHH


T!:

U3q6#,'(

S9

T#a&&'  r   r   c                   b    \ rS rSrSr\R                  R                  SS5      rSr	SS/r
S/rS	rg
)DefaultEntryTestiV  zOUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
    rp         rI   rJ   rK   rM   N)r@   rA   rB   rC   r   r   r    ra   r_   r^   r   r   rD   rM   r   r   r   r   V  s:    J %%11!R8LKD'
(Cr   r   c                   "    \ rS rSrSrSrS rSrg)StringFormattingTestic  zUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow: /some/path
    zxUser-agent: cybermapper
Disallow: /some/path

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/c                 b    U R                  [        U R                  5      U R                  5        g r   )r;   strr"   expected_outputr=   s    r   test_string_formatting+StringFormattingTest.test_string_formattingy  s!    T[[)4+?+?@r   rM   N)r@   rA   rB   rC   r   r   r   rD   rM   r   r   r   r   c  s    	JOAr   r   z&Socket server requires working socket.c                        \ rS rSrS rS rSrg)BaseLocalNetworkTestCasei}  c                 d   U R                  [        R                  R                  5        [	        [
        R                  S4U R                  5      U l        [        R                  " SU R                  R                  SS0S9U l        SU R                  l        U R                  R                  5         g )Nr   zHTTPServer servingpoll_intervalg{Gz?)nametargetkwargsT)
addCleanupr   request
urlcleanupr   r   HOSTRobotHandlerserver	threadingThreadserve_forevertdaemonstartr=   s    r   r&   BaseLocalNetworkTestCase.setUp  s}    112 -"4"4a!8$:K:KL!!%;;,, $D)+ r   c                     U R                   R                  5         U R                  R                  5         U R                   R	                  5         g r   )r   shutdownr   joinserver_closer=   s    r   tearDown!BaseLocalNetworkTestCase.tearDown  s/      "r   )r   r   N)r@   rA   rB   rC   r&   r   rD   rM   r   r   r   r   }  s     #r   r   s`   User-agent: test_robotparser
Disallow: /utf8/🐍
Disallow: /non-utf8/
Disallow: //[spam]/path
c                   N    \ rS rSr " S S\5      r\R                  S 5       rSr	g)LocalNetworkTestCasei  c                        \ rS rSrS rS rSrg)!LocalNetworkTestCase.RobotHandleri  c                     U R                  S5        U R                  5         U R                  R                  [        5        g )N   )send_responseend_headerswfilewriteSAMPLE_ROBOTS_TXTr=   s    r   do_GET(LocalNetworkTestCase.RobotHandler.do_GET  s/    s#JJ./r   c                     g r   rM   r$   formatargss      r   log_message-LocalNetworkTestCase.RobotHandler.log_message      r   rM   Nr@   rA   rB   rC   r   r   rD   rM   r   r   r   r     s    	0
	r   r   c                 ^   U R                   R                  nS[        R                   SUS    3nUS-   n[        R
                  R                  5       nUR                  U5        UR                  5         SnU R                  UR                  XS5      5        U R                  UR                  XRS-   5      5        U R                  UR                  XRS-   5      5        U R                  UR                  XRS-   5      5        U R                  UR                  XRS-   5      5        U R                  UR                  XRS	-   5      5        U R                  UR                  XRS
-   5      5        U R                  UR                  XRS-   5      5        U R                  UR                  XRS-   5      5        g )Nhttp://:r   /robots.txtr   z/utf8/u
   /utf8/🐍z/utf8/%F0%9F%90%8Dz
/non-utf8/z/non-utf8/%F0u   /non-utf8/🐍z/%2F[spam]/path)r   server_addressr   r   r   r    r!   set_urlreadr3   r4   r8   )r$   addrr-   
robots_urlr"   r+   s         r   testReadLocalNetworkTestCase.testRead  so    {{))**+1T!WI6=(
##335z""((;<((h?@))%7I1IJK))%7K1KLM))%7I1IJK((l0BCD))%1FGH))%7M1MNO))%7H1HIJr   rM   N)
r@   rA   rB   rC   r   r   r   reap_threadsr   rD   rM   r   r   r   r     s+    -  ""K #Kr   r   c                   N    \ rS rSr " S S\5      r\R                  S 5       rSr	g)PasswordProtectedSiteTestCasei  c                        \ rS rSrS rS rSrg)*PasswordProtectedSiteTestCase.RobotHandleri  c                 (    U R                  SS5        g )Ni  zForbidden access)
send_errorr=   s    r   r   1PasswordProtectedSiteTestCase.RobotHandler.do_GET  s    OOC!34r   c                     g r   rM   r   s      r   r   6PasswordProtectedSiteTestCase.RobotHandler.log_message  r   r   rM   Nr   rM   r   r   r   r     s    	5	r   r   c                 D   U R                   R                  nS[        R                  -   S-   [	        US   5      -   nUS-   n[
        R                  R                  5       nUR                  U5        UR                  5         U R                  UR                  SU5      5        g )Nr   r   r   r   *)r   r   r   r   r   r   r    r!   r   r   r8   r4   )r$   r   r-   r   r"   s        r   testPasswordProtectedSite7PasswordProtectedSiteTestCase.testPasswordProtectedSite  s    {{))-,,,s2Sa\A=(
##335s))#z:;r   rM   N)
r@   rA   rB   rC   r   r   r   r   r   rD   rM   r   r   r   r     s)    -  ""< #<r   r   c                   b    \ rS rSrSrSR                  \5      r\S 5       rS r	S r
S rS rS	rg
)NetworkTestCasei  zhttp://www.pythontest.net/z{}elsewhere/robots.txtc                 4   [         R                  " S5        [        R                  " U R                  5         [
        R                  R                  U R                  5      U l	        U R                  R                  5         S S S 5        g ! , (       d  f       g = f)Nnetwork)r   requiresr   transient_internetbase_urlr   r    r!   r   r"   r   )r   s    r   
setUpClassNetworkTestCase.setUpClass  sX    #--cll;++;;CNNKCJJJOO <;;s   A	B		
Bc                     SR                  U R                  U[        R                  R	                  U5      S   (       d  S5      $ S5      $ )Nz{}{}{}r   rI   r   )r   r   ospathsplitext)r$   r   s     r   r-   NetworkTestCase.url  sA    MM4BGG,<,<T,B1,E
 	
KM
 	
r   c                    U R                  U R                  R                  5        U R                  U R                  R                  5        U R	                  U R                  R                  5       S5        U R                  U R                  R                  S5      5        U R                  U R                  R                  S5      5        g )Nr   r   )r8   r"   disallow_all	allow_allassertGreatermtimer^   r_   r=   s    r   
test_basicNetworkTestCase.test_basic  s    112../4;;,,.2005611#67r   c                    U R                  U R                  R                  SU R                  S5      5      5        U R	                  U R                  R                  SU R
                  5      5        U R	                  U R                  R                  SU R                  S5      5      5        U R	                  U R                  R                  SU R                  S5      5      5        U R	                  U R                  R                  SU R                  S5      5      5        U R                  U R                  R                  SU R
                  5      5        g )Nr   	elsewhereNutchbrianwebstats)r3   r"   r4   r-   r8   r   r=   s    r   test_can_fetchNetworkTestCase.test_can_fetch  s    --c488K3HIJ..wFG..w8IJK..w8LMN..sDHHZ4HIJ--c4==ABr   c                    [         R                  R                  U R                  S5      5      nUR	                  5         U R                  UR                  5        U R                  UR                  5        U R                  UR                  5       S5        U R                  UR                  S5      5        U R                  UR                  S5      5        g )Nzi-robot.txtr   r   )r   r    r!   r-   r   r3   r  r8   r  r;   r  assertIsNoner^   r_   )r$   r"   s     r   test_read_404NetworkTestCase.test_read_404  s    ##33DHH]4KL((),,-+&,,S12&--c23r   rM   N)r@   rA   rB   rC   r   r   r   classmethodr   r-   r  r  r  rD   rM   r   r   r   r     s@     ,H)00:J 

8C4r   r   __main__)-r   r   r   unittesturllib.robotparserr   testr   test.supportr   r   http.serverr   r   r	   TestCaserG   rP   rT   rW   r[   rh   rk   rv   rz   r}   r   r   r   r   r   r   r   r   r   
skipUnlesshas_socket_supportr   r   r   r   requires_working_socketr   r@   mainrM   r   r   <module>r     sG   	 	     & ) :'B 'BTBM8+<+< B)=(2C2C )"G-!2!2 G"7-):): 7- 8'):): 
.#68I8I .$-5 -]H,=,=  	M8+<+< 	
(M83D3D 
(M8+<+<  5 
(M8+<+< 
(,mX->-> ,"
M83D3D 
H(-):): H(V
)*H,=,= 
)A=(*;*; A4 
,# #	#0 K3X5F5F KB<$<h>O>O <( 	  "'4h'' '4 #'4R ZMMO r   