o
    HXîh‰+  ã                   @   s(  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dlm
Z
 d dlmZmZ G dd„ dƒZG dd	„ d	eejƒZG d
d„ deejƒZG dd„ deejƒZG dd„ deejƒZG dd„ deƒZG dd„ deejƒZG dd„ deejƒZG dd„ deƒZG dd„ deejƒZG dd„ deejƒZG dd„ deejƒZG dd„ deejƒZG d d!„ d!eƒZG d"d#„ d#eejƒZG d$d%„ d%eejƒZG d&d'„ d'eejƒZG d(d)„ d)eejƒZ G d*d+„ d+eejƒZ!G d,d-„ d-eejƒZ"G d.d/„ d/eƒZ#G d0d1„ d1ejƒZ$G d2d3„ d3ejƒZ%e&d4kre '¡  dS dS )5é    N)Úsupport)Úsocket_helper)Úthreading_helper)ÚBaseHTTPRequestHandlerÚ
HTTPServerc                   @   sH   e Zd ZdZdZg Zg ZdZdd„ Zdd„ Z	dd	„ Z
d
d„ Zdd„ ZdS )ÚBaseRobotTestÚ Ztest_robotparserNc                 C   s,   t  | j¡ ¡ }tj ¡ | _| j |¡ d S ©N)	ÚioÚStringIOÚ
robots_txtÚ	readlinesÚurllibÚrobotparserÚRobotFileParserÚparserÚparse)ÚselfÚlines© r   ú?/opt/python-3.10.19/usr/lib/python3.10/test/test_robotparser.pyÚsetUp   s   zBaseRobotTest.setUpc                 C   s$   t |tƒr|\}}||fS | j|fS r	   )Ú
isinstanceÚtupleÚagent©r   Úurlr   r   r   r   Úget_agent_and_url   s   

zBaseRobotTest.get_agent_and_urlc              	   C   ó`   | j D ]*}|  |¡\}}| j||d |  | j ||¡¡ W d   ƒ n1 s(w   Y  qd S ©N)r   r   )Úgoodr   ÚsubTestÚ
assertTruer   Ú	can_fetchr   r   r   r   Útest_good_urls   ó   
ÿ€þzBaseRobotTest.test_good_urlsc              	   C   r   r   )Úbadr   r!   ÚassertFalser   r#   r   r   r   r   Útest_bad_urls$   r%   zBaseRobotTest.test_bad_urlsc                 C   s   |   | j ¡ | j¡ d S r	   )ÚassertEqualr   Ú	site_maps©r   r   r   r   Útest_site_maps*   ó   zBaseRobotTest.test_site_maps)Ú__name__Ú
__module__Ú__qualname__r   r   r    r&   r*   r   r   r$   r(   r,   r   r   r   r   r      s    r   c                   @   s    e Zd ZdZddgZg d¢ZdS )ÚUserAgentWildcardTestz•User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
Disallow: /foo.html
    ú/ú
/test.html)ú/cyberworld/map/index.htmlz/tmp/xxxú	/foo.htmlN©r.   r/   r0   r   r    r&   r   r   r   r   r1   .   s    r1   c                   @   s   e Zd ZdZg d¢ZdgZdS )ÚCrawlDelayAndCustomAgentTestzå# robots.txt for http://www.example.com/

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
    )r2   r3   )Zcybermapperr4   r4   Nr6   r   r   r   r   r7   9   s    
r7   c                   @   s&   e Zd ZdZddgZdgZddgZdS )ÚSitemapTesta  # robots.txt for http://www.example.com/

User-agent: *
Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml
Sitemap: http://www.google.com/hostednews/sitemap_index.xml
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

    r2   r3   r4   z7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlN)r.   r/   r0   r   r    r&   r*   r   r   r   r   r8   J   s    
ÿr8   c                   @   s   e Zd ZdZg Zg d¢ZdS )ÚRejectAllRobotsTestz(# go away
User-agent: *
Disallow: /
    )r4   r2   ú/tmp/Nr6   r   r   r   r   r9   [   s    r9   c                   @   s   e Zd ZdZdZdd„ ZdS )ÚBaseRequestRateTestNc              	   C   sÂ   | j }| j| j D ]U}|  |¡\}}| j||d= |  | |¡| j¡ | |¡}|  || j¡ | jd urO|  |t	j
j¡ |  |j| jj¡ |  |j| jj¡ W d   ƒ n1 sYw   Y  q	d S r   )r   r    r&   r   r!   r)   Úcrawl_delayÚrequest_rateZassertIsInstancer   r   ÚRequestRateZrequestsZseconds)r   r   r   r   Zparsed_request_rater   r   r   Útest_request_ratei   s0   

þþþ€ò€þz%BaseRequestRateTest.test_request_rate)r.   r/   r0   r=   r<   r?   r   r   r   r   r;   e   s    r;   c                   @   s   e Zd ZdZdgZdS )ÚEmptyFileTestr   z/fooN)r.   r/   r0   r   r    r   r   r   r   r@      s    
r@   c                   @   s4   e Zd ZdZdZej dd¡ZdZ	dgZ
g d¢ZdS )	ÚCrawlDelayAndRequestRateTestz’User-agent: figtree
Crawl-delay: 3
Request-rate: 9/30
Disallow: /tmp
Disallow: /a%3cd.html
Disallow: /a%2fb.html
Disallow: /%7ejoe/index.html
    Úfigtreeé	   é   é   )rB   r5   )ú/tmpz	/tmp.htmlú/tmp/a.htmlú/a%3cd.htmlú/a%3Cd.htmlz/a%2fb.htmlz/~joe/index.htmlN)r.   r/   r0   r   r   r   r   r>   r=   r<   r    r&   r   r   r   r   rA   †   s    	rA   c                   @   ó   e Zd ZdZdS )ÚDifferentAgentTestzFigTree Robot libwww-perl/5.04N©r.   r/   r0   r   r   r   r   r   rK   ˜   ó    rK   c                   @   s"   e Zd ZdZdgZg d¢ZdZdS )ÚInvalidRequestRateTestzUser-agent: *
Disallow: /tmp/
Disallow: /a%3Cd.html
Disallow: /a/b.html
Disallow: /%7ejoe/index.html
Crawl-delay: 3
Request-rate: 9/banana
    rF   )r:   rG   rH   rI   z	/a/b.htmlz/%7Ejoe/index.htmlrE   N)r.   r/   r0   r   r    r&   r<   r   r   r   r   rN   œ   s
    	rN   c                   @   s   e Zd ZdZdgZg ZdS )ÚInvalidCrawlDelayTestz2User-Agent: *
Disallow: /.
Crawl-delay: pears
    r5   Nr6   r   r   r   r   rO   ¬   s    rO   c                   @   ó    e Zd ZdZdZdgZdgZdS )ÚAnotherInvalidRequestRateTestzeUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
Request-rate: whale/banana
    Ú	Googlebotú/folder1/myfile.htmlú/folder1/anotherfile.htmlN©r.   r/   r0   r   r   r    r&   r   r   r   r   rQ   ¸   s
    
rQ   c                   @   s   e Zd ZdZdZdgZdS )ÚUserAgentOrderingTestzMUser-agent: Googlebot
Disallow: /

User-agent: Googlebot-Mobile
Allow: /
    rR   z/something.jpgN)r.   r/   r0   r   r   r&   r   r   r   r   rV   Å   s    
rV   c                   @   rJ   )ÚUserAgentGoogleMobileTestzGooglebot-MobileNrL   r   r   r   r   rW   Ô   rM   rW   c                   @   rP   )ÚGoogleURLOrderingTestzJUser-agent: Googlebot
Allow: /folder1/myfile.html
Disallow: /folder1/
    Z	googlebotrS   rT   NrU   r   r   r   r   rX   Ø   s
    
rX   c                   @   ó   e Zd ZdZdgZdgZdS )ÚDisallowQueryStringTestz2User-agent: *
Disallow: /some/path?name=value
    ú
/some/pathz/some/path?name=valueNr6   r   r   r   r   rZ   å   s    
rZ   c                   @   rY   )ÚUseFirstUserAgentWildcardTestzNUser-agent: *
Disallow: /some/path

User-agent: *
Disallow: /another/path
    z/another/pathr[   Nr6   r   r   r   r   r\   ï   s    
r\   c                   @   rY   )ÚEmptyQueryStringTestz>User-agent: *
Allow: /some/path?
Disallow: /another/path?
    z/some/path?z/another/path?Nr6   r   r   r   r   r]   ü   s    
r]   c                   @   s0   e Zd ZdZej dd¡ZdZddgZ	dgZ
dS )	ÚDefaultEntryTestzOUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/
    rE   é   é   r2   r3   r4   N)r.   r/   r0   r   r   r   r>   r=   r<   r    r&   r   r   r   r   r^     s    
r^   c                   @   s   e Zd ZdZdZdd„ ZdS )ÚStringFormattingTestzÆUser-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/ # This is an infinite virtual URL space

# Cybermapper knows where to go.
User-agent: cybermapper
Disallow: /some/path
    zxUser-agent: cybermapper
Disallow: /some/path

User-agent: *
Crawl-delay: 1
Request-rate: 3/15
Disallow: /cyberworld/map/c                 C   s   |   t| jƒ| j¡ d S r	   )r)   Ústrr   Úexpected_outputr+   r   r   r   Útest_string_formatting*  r-   z+StringFormattingTest.test_string_formattingN)r.   r/   r0   r   rc   rd   r   r   r   r   ra     s    
ra   c                   @   s   e Zd Zdd„ Zdd„ ZdS )ÚRobotHandlerc                 C   s   |   dd¡ d S )Ni“  zForbidden access)Z
send_errorr+   r   r   r   Údo_GET0  s   zRobotHandler.do_GETc                 G   s   d S r	   r   )r   ÚformatÚargsr   r   r   Úlog_message3  s   zRobotHandler.log_messageN)r.   r/   r0   rf   ri   r   r   r   r   re   .  s    re   c                   @   s*   e Zd Zdd„ Zdd„ Zejdd„ ƒZdS )ÚPasswordProtectedSiteTestCasec                 C   sP   |   tjj¡ ttjdftƒ| _t	j
d| jjddid| _d| j_| j ¡  d S )Nr   zHTTPServer servingZpoll_intervalg{®Gáz„?)ÚnameÚtargetÚkwargsT)Z
addCleanupr   ZrequestÚ
urlcleanupr   r   ÚHOSTre   ÚserverÚ	threadingÚThreadZserve_foreverÚtÚdaemonÚstartr+   r   r   r   r   9  s   úz#PasswordProtectedSiteTestCase.setUpc                 C   s"   | j  ¡  | j ¡  | j  ¡  d S r	   )rp   Úshutdownrs   ÚjoinZserver_closer+   r   r   r   ÚtearDownI  s   

z&PasswordProtectedSiteTestCase.tearDownc                 C   s\   | j j}dtj d t|d ƒ }|d }tj ¡ }| |¡ | 	¡  |  
| d|¡¡ d S )Nzhttp://ú:r`   z/robots.txtÚ*)rp   Zserver_addressr   ro   rb   r   r   r   Zset_urlÚreadr'   r#   )r   Úaddrr   Z
robots_urlr   r   r   r   ÚtestPasswordProtectedSiteN  s   

z7PasswordProtectedSiteTestCase.testPasswordProtectedSiteN)r.   r/   r0   r   rx   r   Zreap_threadsr}   r   r   r   r   rj   7  s
    rj   c                   @   sF   e Zd ZdZd e¡Zedd„ ƒZdd„ Zdd„ Z	d	d
„ Z
dd„ ZdS )ÚNetworkTestCasezhttp://www.pythontest.net/z{}elsewhere/robots.txtc                 C   sV   t  d¡ t | j¡ tj | j¡| _	| j	 
¡  W d   ƒ d S 1 s$w   Y  d S )NÚnetwork)r   Zrequiresr   Ztransient_internetÚbase_urlr   r   r   r   r   r{   )Úclsr   r   r   Ú
setUpClass^  s
   
"þzNetworkTestCase.setUpClassc                 C   s&   d  | j|tj |¡d sd¡S d¡S )Nz{}{}{}r`   r2   r   )rg   r€   ÚosÚpathÚsplitext)r   r„   r   r   r   r   e  s
   ÿÿzNetworkTestCase.urlc                 C   sV   |   | jj¡ |   | jj¡ |  | j ¡ d¡ |   | j d¡¡ |   | j d¡¡ d S )Nr   rz   )r'   r   Údisallow_allÚ	allow_allZassertGreaterÚmtimer<   r=   r+   r   r   r   Ú
test_basicj  s
   zNetworkTestCase.test_basicc                 C   s˜   |   | j d|  d¡¡¡ |  | j d| j¡¡ |  | j d|  d¡¡¡ |  | j d|  d¡¡¡ |  | j d|  d¡¡¡ |   | j d| j¡¡ d S )Nrz   Z	elsewhereZNutchZbrianZwebstats)r"   r   r#   r   r'   r€   r+   r   r   r   Útest_can_fetchq  s   zNetworkTestCase.test_can_fetchc                 C   sf   t j |  d¡¡}| ¡  |  |j¡ |  |j¡ |  	| 
¡ d¡ |  | d¡¡ |  | d¡¡ d S )Nzi-robot.txtr   rz   )r   r   r   r   r{   r"   r‡   r'   r†   r)   rˆ   ZassertIsNoner<   r=   )r   r   r   r   r   Útest_read_404y  s   zNetworkTestCase.test_read_404N)r.   r/   r0   r€   rg   r   Úclassmethodr‚   r   r‰   rŠ   r‹   r   r   r   r   r~   Y  s    

r~   Ú__main__)(r
   rƒ   rq   ZunittestZurllib.robotparserr   Útestr   Ztest.supportr   r   Zhttp.serverr   r   r   ZTestCaser1   r7   r8   r9   r;   r@   rA   rK   rN   rO   rQ   rV   rW   rX   rZ   r\   r]   r^   ra   re   rj   r~   r.   Úmainr   r   r   r   Ú<module>   sF    "

	"
)ÿ