a
    Rh                     @   s   d Z ddlZddlZddlZddlmZ G dd dejjZ	G dd de	Z
G dd	 d	e	ZG d
d de	ZG dd dejZG dd deZG dd deZedkre  dS )zTests for HTMLParser.py.    N)supportc                   @   sr   e Zd ZddddZdd Zdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd ZdS )EventCollectorF	autocdatac                O   sD   || _ g | _| jj| _tjjj| g|R i | |r@| d d S )NF)r   eventsappendhtmlparser
HTMLParser__init___set_support_cdata)selfr   argskw r   </opt/python-3.9.24/usr/lib/python3.9/test/test_htmlparser.pyr      s    
zEventCollector.__init__c                 C   sj   g }d }| j D ]P}|d }||  kr.dkrPn nd|d d |d  f|d< n
|| |}q|| _ |S )Nr   data   )r   r   )r   LZprevtypeeventtyper   r   r   
get_events   s    

zEventCollector.get_eventsc                 C   s,   |  d||f | jr(|dkr(| d d S )NstarttagsvgTr   r   r   r   tagattrsr   r   r   handle_starttag%   s    zEventCollector.handle_starttagc                 C   s   |  d||f d S )Nstartendtagr   r   r   r   r   handle_startendtag*   s    z!EventCollector.handle_startendtagc                 C   s*   |  d|f | jr&|dkr&| d d S )Nendtagr   Fr   )r   r   r   r   r   handle_endtag-   s    zEventCollector.handle_endtagc                 C   s   |  d|f d S )Ncommentr!   r   r   r   r   r   handle_comment4   s    zEventCollector.handle_commentc                 C   s   |  d|f d S )Ncharrefr!   r&   r   r   r   handle_charref7   s    zEventCollector.handle_charrefc                 C   s   |  d|f d S )Nr   r!   r&   r   r   r   handle_data:   s    zEventCollector.handle_datac                 C   s   |  d|f d S )Ndeclr!   r&   r   r   r   handle_decl=   s    zEventCollector.handle_declc                 C   s   |  d|f d S )N	entityrefr!   r&   r   r   r   handle_entityref@   s    zEventCollector.handle_entityrefc                 C   s   |  d|f d S )Npir!   r&   r   r   r   	handle_piC   s    zEventCollector.handle_pic                 C   s   |  d|f d S )Nunknown declr!   )r   r+   r   r   r   unknown_declF   s    zEventCollector.unknown_declN)__name__
__module____qualname__r   r   r   r"   r$   r'   r)   r*   r,   r.   r0   r2   r   r   r   r   r   
   s   r   c                   @   s   e Zd Zdd ZdS )EventCollectorExtrac                 C   s$   t | || | d|  f d S )Nstarttag_text)r   r   r   Zget_starttag_textr   r   r   r   r   L   s    z#EventCollectorExtra.handle_starttagN)r3   r4   r5   r   r   r   r   r   r6   J   s   r6   c                   @   s   e Zd Zdd Zdd ZdS )EventCollectorCharrefsc                 C   s   |  d d S Nz6This should never be called with convert_charrefs=Truefailr&   r   r   r   r)   S   s    z%EventCollectorCharrefs.handle_charrefc                 C   s   |  d d S r9   r:   r&   r   r   r   r.   V   s    z'EventCollectorCharrefs.handle_entityrefN)r3   r4   r5   r)   r.   r   r   r   r   r8   Q   s   r8   c                   @   s   e Zd Zdd ZdS )EventCollectorNoNormalizec                 C   s   | j S N)r   r   r   r   r   r   ]   s    z$EventCollectorNoNormalize.get_eventsN)r3   r4   r5   r   r   r   r   r   r<   \   s   r<   c                   @   s&   e Zd Zdd ZdddZdd ZdS )	TestCaseBasec                 C   s
   t ddS NFconvert_charrefs)r   r>   r   r   r   get_collectorc   s    zTestCaseBase.get_collectorNc                 C   sr   |d u r|   }|}|D ]}|| q|  | }||krn| dt| d t| d t|  d S )Nz6received events did not match expected events
Source:
z
Expected:
z
Received:
)rC   feedcloser   r;   reprpprintZpformat)r   sourceZexpected_events	collectorr	   sr   r   r   r   
_run_checkf   s&    zTestCaseBase._run_checkc                 C   s   |  ||tdd d S r@   )rK   r6   )r   rH   r   r   r   r   _run_check_extrat   s    zTestCaseBase._run_check_extra)N)r3   r4   r5   rC   rK   rL   r   r   r   r   r?   a   s   
r?   c                   @   s6  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zedg ddd Zedg ddd Zedg d d!d" Zedg d#d$d% Zed&g d'd(d) Zed&g d*d+d, Zed&g d-d.d/ Zed&g d0d1d2 Zed3g d4d5d6 Zed3g d7d8d9 Zd:d; Zd<d= Zd>d? Zd@dA ZdBdC ZdDdE Z dFdG Z!dHdI Z"dJdK Z#dLdM Z$dNdO Z%dPdQ Z&dRdS Z'dTdU Z(dVdW Z)edg dXdYdZ Z*d[d\ Z+d]d^ Z,edg d_d`da Z-dbdc Z.ddde Z/e0dfdgdh Z1diS )jHTMLParserTestCasec                 C   s    |  ddg |  ddg d S )Nz<?processing instruction>)r/   zprocessing instructionz<?processing instruction ?>)r/   zprocessing instruction ?rK   r>   r   r   r    test_processing_instruction_only{   s    z3HTMLParserTestCase.test_processing_instruction_onlyc                 C   sB   |  ddddddg fddddddd	d
dgfdddddddg d S )Nz
<!DOCTYPE html PUBLIC 'foo'>
<HTML>&entity;&#32;
<!--comment1a
-></foo><bar>&lt;<?pi?></foo<bar
comment1b-->
<Img sRc='Bar' isMAP>sample
text
&#x201C;
<!--comment2a-- --comment2b-->
</Html>
r   
)r+   zDOCTYPE html PUBLIC 'foo'r   r   )r-   entity)r(   Z32)r%   z4comment1a
-></foo><bar>&lt;<?pi?></foo<bar
comment1bimg)srcZBar)ZismapN)r   zsample
text
)r(   Zx201C)r%   zcomment2a-- --comment2br#   r   rN   r>   r   r   r   test_simple_html   s(    z#HTMLParserTestCase.test_simple_htmlc                 C   s6   |  dddg fddg |  dgddg fddg d S )	Nz<p>&#bad;</p>r   p)r   z&#bad;r#   rW   z<div>&#bad;</div>divr#   rY   rN   r>   r   r   r   test_malformatted_charref   s    z,HTMLParserTestCase.test_malformatted_charrefc                 C   s   |  dddg d S )Nz&entityref foo)r-   r-   )r   z foorN   r>   r   r   r   test_unclosed_entityref   s    z*HTMLParserTestCase.test_unclosed_entityrefc                 C   s$   |  dddg fddg fddg d S )Nz<a><b></a></b>r   abr#   r]   r#   r^   rN   r>   r   r   r   test_bad_nesting   s    z#HTMLParserTestCase.test_bad_nestingc                 C   s   |  ddg d S )N#this text & contains & ampersands &)r   rb   rN   r>   r   r   r   test_bare_ampersands   s    z'HTMLParserTestCase.test_bare_ampersandsc                 C   s   |  ddg d S )N.this < text > contains < bare>pointy< brackets)r   rd   rN   r>   r   r   r   test_bare_pointy_brackets   s    z,HTMLParserTestCase.test_bare_pointy_bracketsc                 C   s0   |  ddddgfg |  ddddgfg d S )N	<a b='<'>r   r]   r^   <	<a b='>'>r^   >rN   r>   r   r   r   test_starttag_end_boundary   s    z-HTMLParserTestCase.test_starttag_end_boundaryc                 C   s  dddgfg}|  dg| |  ddg| |  ddg| |  d	d
g| |  ddg| |  ddg| dddgfg}|  dg| |  ddg| |  ddg| |  d	dg| |  ddg| |  ddg| dg}|  ddg| |  ddg| |  ddg| |  ddg| |  dd g| |  d!d"g| |  d#d$g| |  d%d&g| |  d'd(g| |  d)dg| |  ddg| d S )*Nr   r]   rg   rf   <a zb='<'>z<a bz='<'>z<a b=z'<'>z<a b='<z'>z<a b='<'rk   rj   ri   zb='>'>z='>'>z'>'>z<a b='>z<a b='>')r%   abc z
<!--abc-->rh   z	!--abc--><!z--abc--><!-z-abc--><!--zabc-->z<!--azbc-->z<!--abzc-->z<!--abcz-->z<!--abc-z->z	<!--abc--rN   r   outputr   r   r   test_buffer_artefacts   s4    z(HTMLParserTestCase.test_buffer_artefactsc                 C   s0   g d}|D ]}|  d| dd| fg qd S )N)
ZHTMLzOHTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"z[HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"zbhtml PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"zfhtml PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"zYmath PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"zhtml PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"zWsvg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"z'html PUBLIC "-//IETF//DTD HTML 2.0//EN"z,html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"z<!DOCTYPE %s>r+   DOCTYPE rN   )r   ZdtdsZdtdr   r   r   test_valid_doctypes   s
    
z&HTMLParserTestCase.test_valid_doctypesc                 C   sN   |  dddg fg |  dddg fdg |  dddg fddd	gfdg d S )
Nz<p/>r    rW   z<p></p>r   rX   z<p><img src='foo' /></p>rS   )rT   foorN   r>   r   r   r   test_startendtag  s    
z#HTMLParserTestCase.test_startendtagc                 C   s&   d}|  |ddddgfd|fg d S )Nz <foo:bar   
   one="1"	two=2   >r   zfoo:bar)Zone1)Ztwo2r7   )rL   )r   rJ   r   r   r   test_get_starttag_text  s
    z)HTMLParserTestCase.test_get_starttag_textcontent)z*<!-- not a comment --> &not-an-entity-ref;<not a='start tag'>z<a href="" /> <p> <span></span>zfoo = "</scr" + "ipt>";zfoo = "</SCRIPT" + ">";zfoo = <
/script> z*<!-- document.write("</scr" + "ipt>"); -->z
//<![CDATA[
document.write('<s'+'cript type="text/javascript" src="http://www.example.org/r='+new Date().getTime()+'"><\/s'+'cript>');
//]]>z 
<!-- //
var foo = 3.14;
// -->
u   <!-- ☃ -->zfoo = "</ script>"zfoo = "</scripture>"zfoo = "</script>"u   foo = "</script >"u   foo = "</ſcript>"u   foo = "</scrıpt>"c                 C   s,   d| d}|  |ddg fd|fdg d S )Nz<script>z	</script>r   scriptr   r#   r   rN   r   r}   rJ   r   r   r   test_script_content  s
    z&HTMLParserTestCase.test_script_content)
z0a::before { content: "<!-- not a comment -->"; }z-a::before { content: "&not-an-entity-ref;"; }z-a::before { content: "<not a='start tag'>"; }u   a::before { content: "☃"; }z#a::before { content: "< /style>"; }z#a::before { content: "</ style>"; }z#a::before { content: "</styled>"; }z#a::before { content: "</style>"; }u$   a::before { content: "</style >"; }u#   a::before { content: "</ſtyle>"; }c                 C   s,   d| d}|  |ddg fd|fdg d S )Nz<style>z</style>r   styler   r#   r   rN   r   r   r   r   test_style_content4  s
    z%HTMLParserTestCase.test_style_content)<!-- not a comment -->r~   <![CDATA[not a cdata]]><!not a bogus comment></not a bogus comment>   ☃z	< /title>z	</ title>z	</titled>z	</title>u
   </title >u	   </tıtle>c                 C   s,   d| d}|  |ddg fd|fdg d S )Nz<title>z</title>r   titler   r#   r   rN   r   r}   rH   r   r   r   test_title_contentF  s    z%HTMLParserTestCase.test_title_content)r   r~   r   r   r   r   z< /textarea>z</ textarea>z</textareable>z</textarea>u   </textarea >c                 C   s,   d| d}|  |ddg fd|fdg d S )Nz
<textarea>z</textarea>r   textarear   r#   r   rN   r   r   r   r   test_textarea_content\  s    z(HTMLParserTestCase.test_textarea_contentr#   )r   ZSCRIPTzscript zscript
zscript/zscript foo=barzscript foo=">"c                 C   s@   d}d| d| d}| j |ddg fd|fdgtd	d
d d S )Nz<!-- not a comment --> &not-an-entity-ref;
                  <a href="" /> </p><p> <span></span></style>
                  '</script' + '>'<ScrIPt></rk   r   r   r   r   FrA   rI   rK   r<   r   r#   r}   rJ   r   r   r   test_script_closing_tagq  s    z*HTMLParserTestCase.test_script_closing_tag)r   ZSTYLEzstyle zstyle
zstyle/zstyle foo=barzstyle foo=">"c                 C   s@   d}d| d| d}| j |ddg fd|fdgtd	d
d d S )Nz
            b::before { content: "<!-- not a comment -->"; }
            p::before { content: "&not-an-entity-ref;"; }
            a::before { content: "<i>"; }
            a::after { content: "</i>"; }
            z<StyLE>r   rk   r   r   r   r   FrA   r   r   r   r   r   r   test_style_closing_tag  s    z)HTMLParserTestCase.test_style_closing_tag)r   ZTITLEztitle ztitle
ztitle/ztitle foo=barztitle foo=">"c                 C   sb   d}d| d| d}| j |ddg fddgtd	d
d | j |ddg fddddgtdd
d d S )N+<!-- not a comment --><i>Egg &amp; Spam</i>z<TitLe>r   rk   r   r   r   z'<!-- not a comment --><i>Egg & Spam</i>r   TrA   r   r   z<!-- not a comment --><i>Egg r-   Zampr   z	 Spam</i>Fr   r   r   r   r   test_title_closing_tag  s     z)HTMLParserTestCase.test_title_closing_tag)r   ZTEXTAREAz	textarea z	textarea
z	textarea/ztextarea foo=barztextarea foo=">"c                 C   sb   d}d| d| d}| j |ddg fddgtd	d
d | j |ddg fddddgtdd
d d S )Nr   z
<TexTarEa>r   rk   r   r   r   r   TrA   r   r   r   r   Fr   r   r   r   r   test_textarea_closing_tag  s     z,HTMLParserTestCase.test_textarea_closing_tagztail,end)ro   Frh   Fr   F)z</sF)z</scriptF)z	</script T)z</script foo=barT)z</script foo=">Tc                 C   sF   d}d| | }| j |ddg fd|r*|n|| fgtddd d S )	Nza = 123r   r   r   r   FrA   r   r   )r   tailendr}   rJ   r   r   r   test_eof_in_script  s    z%HTMLParserTestCase.test_eof_in_script)r   r   r   )z</tF)z</titleF)z</title T)z</title foo=barT)z</title foo=">Tc                 C   sr   d| }| j |ddg fdd|r$dn| fgtddd	 | j |ddg fd
ddd|rXdn| fgtddd	 d S )Nz<TitLe>Egg &amp; Spamr   r   r   z
Egg & Spamro   TrA   r   )r   zEgg r   z SpamFr   )r   r   r   rJ   r   r   r   test_eof_in_title  s    
z$HTMLParserTestCase.test_eof_in_titlec                 C   s   d}g d}|  || d S )Na.  <!-- I'm a valid comment --><!--me too!--><!------><!-----><!----><!---><!--><!----I have many hyphens----><!-- I have a > in the middle --><!-- and I have -- in the middle! --><!--incorrectly-closed-comment--!><!----!><!----!--><!---- >--><!---!>--><!--!>--><!-- <!-- nested --> --><!--<!--><!--<!--!>))r%   z I'm a valid comment )r%   zme too!r%   z--r%   -r%   ro   r   r   )r%   z--I have many hyphens--)r%   z I have a > in the middle )r%   z and I have -- in the middle! )r%   zincorrectly-closed-commentr   )r%   z--!)r%   z-- >r%   z-!>)r%   z!>)r%   z <!-- nested )r   z -->r%   rp   r   rN   r   r   expectedr   r   r   test_comments  s    z HTMLParserTestCase.test_commentsc                 C   s   d}g d}|  || d S )Nzs<!--[if IE & !(lte IE 8)]>aren't<![endif]--><!--[if IE 8]>condcoms<![endif]--><!--[if lte IE 7]>pretty?<![endif]-->))r%   z%[if IE & !(lte IE 8)]>aren't<![endif])r%   z[if IE 8]>condcoms<![endif])r%   z[if lte IE 7]>pretty?<![endif]rN   r   r   r   r   test_condcoms  s    z HTMLParserTestCase.test_condcomsc              
   C   sB  dd }|  | j g d}dddgfddg}|D ]}| jd	||| d
 q4dddg dfdddg}|D ]}| jd||| d
 qn|D ]X}d|gd }dddg fd|fddddg fd|fddg	}| jd|||| d
 qd}tdt|D ],}| j|d | d|d | fg| d
 q| jddg| d
 d S )Nc                   S   s   t  S r=   )r8   r   r   r   r   <lambda>      z:HTMLParserTestCase.test_convert_charrefs.<locals>.<lambda>)z&quot;z&#34;z&#x22;z&quotz&#34z&#x22r   r]   )hrefzfoo"zar)r   za"zr_   z<a href="foo{0}zar">a{0}z</a>r   )r   "))xr   )yz"X)zzX"z*{0}<a x="{0}" y="{0}X" z="X{0}">{0}</a>{0}X   r   r   r   r   r   z/{1}<script>{0}</script>{1}<style>{0}</style>{1}z&quo &# &#xr   no charrefs here)r   r   )Z
assertTruerB   rK   formatjoinrangelen)r   rI   Zcharrefsr   r(   textr   r   r   r   r   test_convert_charrefs
  sR    

z(HTMLParserTestCase.test_convert_charrefsc              
   C   s.   |  ddddgfdddddd	gfd
dg d S )NzF<html <html>te>>xt&a<<bc</a></html>
<img src="URL><//img></html</html>r   r   )z<htmlN)r   zte>>xt)r-   r]   r   rh   zbc<)r]   NrU   rP   rN   r>   r   r   r   test_tolerant_parsing3  s    

z(HTMLParserTestCase.test_tolerant_parsingc                 C   sx  |  ddg |  ddg |  ddg |  ddg |  d	g  |  d
dg |  ddg |  ddg |  dg  |  ddg |  ddg |  dddg fg |  ddg |  ddg |  dg  |  dg  |  dg  |  dg  |  dg  |  d dd!g fg |  d"dd#g fg |  d$d%d#g fg |  d&dd#g fg |  d'd%d#g fg |  d(d)g d S )*Nrh   r   <>)r   r   < >)r   r   < )r   r   z</><$>)r   r   z</$>)r%   $r   )r   r   z</az</ a>)r%   z az</ az<a<a>r   a<az</a<a>)r#   r   rp   r   z<az<a foo='bar'z<a foo='barz
<a foo='>'z	<a foo='>z<a$>za$z<a$b>a$bz<a$b/>r    z<a$b  >z<a$b  />z</a$b>)r#   r   rN   r>   r   r   r   test_starttag_junk_chars>  s2    z+HTMLParserTestCase.test_starttag_junk_charsc              
   C   s   |  ddddgfg d}ddg dfg}|  || d}ddg d	fd
dg d	fg}|  || d}d
dg fd
dg fd
dg fd
dg fddg fddg fddg fddg fg}|  || d S )Nz<a foo="var"/>r    r]   )rx   varzj<img width=902 height=250px src="/sites/default/files/images/homepage/foo.jpg" /*what am I doing here*/ />rS   ))widthZ902)ZheightZ250px)rT   z,/sites/default/files/images/homepage/foo.jpg)z*whatN)amN)iN)ZdoingN)zhere*Nz9<a / /foo/ / /=/ / /bar/ / /><a / /foo/ / /=/ / /bar/ / >))rx   N)=N)barNr   zD<meta><meta / ><meta // ><meta / / ><meta/><meta /><meta //><meta//>metarN   r   r   r   r   test_slashes_in_starttagY  s$    z+HTMLParserTestCase.test_slashes_in_starttagc                 C   s    |  ddg |  ddg d S )Nz</a/>r_   z</a foo="var"/>rN   r>   r   r   r   test_slashes_in_endtagx  s    z)HTMLParserTestCase.test_slashes_in_endtagc                 C   s   |  ddg d S )Nz<!DOCTYPE foo $ >)r+   zDOCTYPE foo $ rN   r>   r   r   r   test_declaration_junk_chars|  s    z.HTMLParserTestCase.test_declaration_junk_charsc                 C   s   |  ddg d S )Nz"<!spacer type="block" height="25">)r%   zspacer type="block" height="25"rN   r>   r   r   r   test_illegal_declarations  s    z,HTMLParserTestCase.test_illegal_declarationsc                 C   sZ   d}ddg fdddg fdddg fdddg fdddg fdddg fddg fg}|  || d S )Nzn<br></label</p><br></div end tmAd-leaderBoard><br></<h4><br></li class="unit"><br></li
						</ul><br></><br>r   br)r#   zlabel<rZ   )r%   z<h4)r#   ZlirN   r   r   r   r   test_invalid_end_tags  s    z(HTMLParserTestCase.test_invalid_end_tagsc                 C   s&   d}ddg fdddg}|  || d S )Nz(<b>This</b attr=">"> confuses the parserr   r^   )r   ZThisr`   )r   z confuses the parserrN   r   r   r   r   test_broken_invalid_end_tag  s    z.HTMLParserTestCase.test_broken_invalid_end_tagc                 C   s   d}dddgfddg fddddgfd	d
dddg fdddg fddddg}|  || d}ddg dfddg fddddgfd	d
g}|  || d S )Nz[<div style=""    ><b>The <a href="some_url">rain</a> <br /> in <span>Spain</span></b></div>r   rY   r   ro   r^   )r   zThe r]   )r   Zsome_url)r   Zrainr_   r    r    r   )r   z in span)r   ZSpainr#   r   r`   rZ   z><div style="", foo = "bar" ><b>The <a href="some_url">rain</a>)r   ,Nrx   r   rN   r   r   r   r   $test_correct_detection_of_start_tags  s4    


z7HTMLParserTestCase.test_correct_detection_of_start_tagsc                 C   sF   ddgfddgfdg dfddd	gfg}|D ]\}}|  || q,d S )
Na&)r   r   za&b)r   abza&b )r   r]   r-   r^   r   za&b;r   r   rN   r   r   r   r   r   r   r   test_EOF_in_charref  s    

z&HTMLParserTestCase.test_EOF_in_charrefc                 C   s   ddgfddgfddgfddgfddgfd	dgfd
dgfddgfddgfddgfddgfddgfddgfddgfg}|D ]\}}|  || qxd S )Nrr   r   z<!---z<!----z<!-----r   z<!------r   z<!----!z<!---!)r%   z-!z<!---!>r   z<!--foo)r%   rx   z<!--foo-z	<!--foo--z
<!--foo--!z<!--<!--r   z	<!--<!--!rN   r   r   r   r   test_eof_in_comments  s"    z'HTMLParserTestCase.test_eof_in_commentsc                 C   sr   ddgfddgfddgfddgfd	d
gfddgfddgfddgfddgfddgfg
}|D ]\}}|  || qXd S )Nrp   r   rq   r   z<![)r%   [z	<!DOCTYPE)r+   ZDOCTYPEz
<!DOCTYPE )r+   rv   z<!DOCTYPE html)r+   zDOCTYPE htmlz<!DOCTYPE html )r+   zDOCTYPE html z<!DOCTYPE html PUBLIC)r+   zDOCTYPE html PUBLICz<!DOCTYPE html PUBLIC "foo)r+   zDOCTYPE html PUBLIC "fooz6<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo)r+   z4DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foorN   r   r   r   r   test_eof_in_declarations  s    z+HTMLParserTestCase.test_eof_in_declarations)ro   r   zx]zx]]c                 C   sn   |  d| dd| fg | j d| dd| fgtddd |  d	| d
dg fd
ddgfdd| fg d S )Nz	<![CDATA[r1   CDATA[r%   z[CDATA[Tr   r   <svg><text y="100"><![CDATA[r   r   r   r   Z100rK   r   )r   r}   r   r   r   test_eof_in_cdata  s    




z$HTMLParserTestCase.test_eof_in_cdatac                 C   s   d}g d}|  || d S )Nz<!ELEMENT br EMPTY><! not really a comment ><! not a comment either --><! -- close enough --><!><!<-- this was an empty comment><!!! another bogus comment !!!>))r%   zELEMENT br EMPTY)r%   z not really a comment )r%   z not a comment either --)r%   z -- close enough --r   )r%   z<-- this was an empty comment)r%   z!! another bogus comment !!!rN   r   r   r   r   test_bogus_comments  s    	z&HTMLParserTestCase.test_bogus_commentsc                 C   sX   d}dddddddgfdd	dd
dgfddddg fdddddd
dgfdg}|  || d S )Nz<![if !(IE)]>broken condcom<![endif]><![if ! IE]><link href="favicon.tiff"/><![endif]><![if !IE 6]><img src="firefox.png" /><![endif]><![if !ie 6]><b>foo</b><![endif]><![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>)r1   zif !(IE))r   zbroken condcom)r1   Zendif)r1   zif ! IEr    link)r   zfavicon.tiff)r1   zif !IE 6rS   )rT   zfirefox.png)r1   zif !ie 6r   r^   )r   rx   r`   )r1   zif (!IE)|(lt IE 9))rT   zmammoth.bmprN   r   r   r   r   test_broken_condcoms  s(    


z'HTMLParserTestCase.test_broken_condcoms)zjust some plain textr   z&not-an-entity-ref;r~   ro   z[[I have many brackets]]zI have a > in the middlezI have a ]] in the middlez] ]>z]] >zN
    if (a < b && a > b) {
        printf("[<marquee>How?</marquee>]");
    }
c                 C   sV   d| d}ddg fdddgfdd| fd	d
g}|  || | j ||tddd d S )Nr   z]]></text></svg>r   r   r   r   r1   r   r#   r   r#   r   Tr   r   r   )r   r}   r   r   r   r   r   test_cdata_section_content9  s    

z-HTMLParserTestCase.test_cdata_section_contentc              	   C   sB   d}ddddg fdddgfdd	d
ddg	}| j ||tddd d S )Nzb<![CDATA[foo<br>bar]]><svg><text y="100"><![CDATA[foo<br>bar]]></text></svg><![CDATA[foo<br>bar]]>)r%   z[CDATA[foo<br)r   zbar]]>r   r   r   r   )r1   zCDATA[foo<br>barr   r   Tr   r   r   r   r   r   r   test_cdata_sectionW  s    
z%HTMLParserTestCase.test_cdata_sectionc                 C   s8   t dd}|d | | dddg fddd	g d S )
NTrA   zfoo <a>link</a> bar &amp; baz)r   zfoo r   r]   )r   r   r_   )r   z
 bar & baz)r   rD   ZassertEqualr   )r   r	   r   r   r   "test_convert_charrefs_dropped_texti  s    

z5HTMLParserTestCase.test_convert_charrefs_dropped_textZcpuc                 C   s   dd }d}|d|  |d|  |d|  |d|  |d|  |d	|  |d
|  |d|  |d|  |d|  d S )Nc                 S   s    t j }||  |  d S r=   )r   r	   r
   rD   rE   )rH   r	   r   r   r   checky  s    

zBHTMLParserTestCase.test_eof_no_quadratic_complexity.<locals>.checki rm   z<a a=z8</a </a </a </a </a </a </a </a </a </a </a </a </a </a zB</a a=</a a=</a a=</a a=</a a=</a a=</a a=</a a=</a a=</a a=</a a=z<!--<!--<!--<!--zx<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!z&<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?z-</$</$</$</$</$</$</$</$</$</$</$</$</$</$</$zQ<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[a;  <!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctyper   )r   r   nr   r   r    test_eof_no_quadratic_complexityu  s    z3HTMLParserTestCase.test_eof_no_quadratic_complexityN)2r3   r4   r5   rO   rV   r[   r\   ra   rc   re   rl   ru   rw   ry   r|   r   ZsubTestsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zrequires_resourcer   r   r   r   r   rM   y   sr    











-	)!
"
rM   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#S )$AttributesTestCasec                 C   s   ddg dfg}|  d| |  ddddgfg |  ddddgfg |  d	dddgfg |  d
dddgfg |  ddddgfg |  ddddgfg |  ddddgfg |  ddddgfg |  ddddgfg d S )Nr   r]   ))r^   v)cr   )dr   )eNz<a b='v' c="v" d=v e>z<a foo==bar>)rx   z=barz<a foo =bar>r   z<a foo	=bar>z<a foo=bar>)zfoor   u   <a foo =bar>)u   foo r   z<a foo= bar>z<a foo=	bar>z<a foo=bar>)rx   zbaru   <a foo= bar>)rx   u    barrN   rs   r   r   r   test_attr_syntax  s    z#AttributesTestCase.test_attr_syntaxc                 C   s   |  dddg dfg |  dddddgfg |  dddd	d
gfg |  dddddgfg |  ddddgfg |  ddddgfg d S )Nz'<a b='xxx
	xxx' c="yyy	
yyy" d='	xyz
'>r   r]   ))r^   zxxx
	xxx)r   zyyy	
yyy)r   z	xyz
z<a b='' c="">)r^   ro   )r   ro   z<a b=	x c=
y>)r^   r   )r   r   u   <a b= c= >)r^   )r       z<e a=rgb(1,2,3)>r   )r]   z
rgb(1,2,3)z<a href=mailto:xyz@example.com>)r   zmailto:xyz@example.comrN   r>   r   r   r   test_attr_values  s&    z#AttributesTestCase.test_attr_valuesc                 C   sL   |  dddddgfg |  ddddd	gfg |  d
dddd	gfg d S )Nu!   <img src=/foo/bar.png alt=中文>r   rS   )rT   z/foo/bar.png)Zaltu   中文u+   <a title='テスト' href='テスト.html'>r]   )r   u	   テスト)r   u   テスト.htmlu+   <a title="テスト" href="テスト.html">rN   r>   r   r   r   test_attr_nonascii  s$    z%AttributesTestCase.test_attr_nonasciic                 C   s   |  ddddgfg d S )Nz!<a b='&amp;&gt;&lt;&quot;&apos;'>r   r]   )r^   z&><"'rN   r>   r   r   r   test_attr_entity_replacement  s    z/AttributesTestCase.test_attr_entity_replacementc                 C   s   |  dddg dfg d S )Nz<a a.b='v' c:d=v e-f=v>r   r]   ))za.br   )zc:dr   )ze-fr   rN   r>   r   r   r   test_attr_funky_names  s    z(AttributesTestCase.test_attr_funky_namesc                 C   s   |  ddddgfg d S )Nz0<html foo='&euro;&amp;&#97;&#x61;&unsupported;'>r   r   )rx   u   €&aa&unsupported;rN   r>   r   r   r   test_entityrefs_in_attributes  s    z0AttributesTestCase.test_entityrefs_in_attributesc                 C   s.   |  ddddgfdddgfdddgfg d S )	Nz<a $><b $=%><c \=/>r   r]   )r   Nr^   )r   %r   )\/rN   r>   r   r   r   test_attr_funky_names2  s    


z)AttributesTestCase.test_attr_funky_names2c                 C   s\   dD ]R}|  d| dddgfg |  d| dddgfg |  d| dddgfg qd S )N)&z&amp;z&#38;z&#x26;z<a href="%s">r   r]   )r   r  z<a href='%s'>z<a href=%s>rN   )r   rR   r   r   r    test_entities_in_attribute_value  s    


z3AttributesTestCase.test_entities_in_attribute_valuec                 C   sP   d}dddgfdddddgfddddd	gfd
ddddgfddg}|  || d S )Nz<a href=test'style='color:red;bad1'>test - bad1</a><a href=test'+style='color:red;ba2'>test - bad2</a><a href=test'&nbsp;style='color:red;bad3'>test - bad3</a><a href = test'&nbsp;style='color:red;bad4'  >test - bad4</a>r   r]   )r   ztest'style='color:red;bad1')r   ztest - bad1r_   )r   ztest'+style='color:red;ba2')r   ztest - bad2)r   u   test' style='color:red;bad3')r   ztest - bad3)r   u   test' style='color:red;bad4')r   ztest - bad4rN   r   r   r   r   test_malformed_attributes  s     



z,AttributesTestCase.test_malformed_attributesc                 C   sH   |  dddg fddddgfdg |  d	ddg fdddd
gfdg d S )Nz<x><y z=""o"" /></x>r   r   r    r   )r   ro   )zo""N)r#   r   z<x><y z="""" /></x>)z""NrN   r>   r   r   r   "test_malformed_adjacent_attributes  s    z5AttributesTestCase.test_malformed_adjacent_attributesc                 C   s4   |  dddddgfg |  dddddgfg d S )	Nz<a width="100%"cellspacing=0>r   r]   r   z100%Zcellspacing0z<a id="foo"class="bar">)idrx   classr   rN   r>   r   r   r   test_adjacent_attributes  s    z+AttributesTestCase.test_adjacent_attributesc                 C   s   |  ddddgfg d S )Nz<a v=>r   r]   )r   ro   rN   r>   r   r   r   test_missing_attribute_value  s    z/AttributesTestCase.test_missing_attribute_valuec                 C   s   |  ddddgfg d S )Nz-<a href=javascript:popup('/popup/help.html')>r   r]   )r   z$javascript:popup('/popup/help.html')rN   r>   r   r   r   test_javascript_attribute_value  s
    z2AttributesTestCase.test_javascript_attribute_valuec                 C   s   |  ddddgfddg d S )Nz-<a href='http://www.example.org/">;'>spam</a>r   r]   )r   zhttp://www.example.org/">;)r   Zspamr_   rN   r>   r   r   r   test_end_tag_in_attribute_value  s    z2AttributesTestCase.test_end_tag_in_attribute_valuec                 C   s   d}ddg fddddgfddg dfdd	g fdd
dgfdddgfddddgfdddgfdddddddgfdddgfddddg}|  || d S )Nz<html><body bgcolor=d0ca90 text='181008'><table cellspacing=0 cellpadding=1 width=100% ><tr><td align=left><font size=-1>- <a href=/rabota/><span class=en> software-and-i</span></a>- <a href='/1/'><span class=en> library</span></a></table>r   r   body)ZbgcolorZd0ca90)r   Z181008table)r  )Zcellpaddingrz   r  trZtd)ZalignleftZfont)sizez-1)r   z- r]   )r   z/rabota/r   )r  en)r   z software-and-ir   r_   )r   z/1/)r   z library)r#   r  rN   r   r   r   r   test_with_unquoted_attributes  s"    

z0AttributesTestCase.test_with_unquoted_attributesc                 C   s   d}dddgfddddgfddddgfddg d	fdddd
gfddddgfddddgfddddgfddddgfg	}|  || d S )Nz<div class=bar,baz=asd><div class="bar",baz="asd"><div class=bar, baz=asd,><div class="bar", baz="asd",><div class="bar",><div class=,bar baz=,asd><div class=,"bar" baz=,"asd"><div ,class=bar ,baz=asd><div class,="bar" baz,="asd">r   rY   )r  zbar,baz=asdr  )z,bazasd)r  zbar,)bazzasd,)r  r   )r"  r!  r   r   )r  z,bar)r"  z,asd)r  z,"bar")r"  z,"asd")z,classr   )zclass,r   )zbaz,r!  rN   r   r   r   r   test_comma_between_attributes3  s    
z0AttributesTestCase.test_comma_between_attributesc                 C   s   |  ddddgfg d S )Nz<form action=bogus|&#()value>r   Zform)actionzbogus|&#()valuerN   r>   r   r   r   -test_weird_chars_in_unquoted_attribute_valuesO  s
    z@AttributesTestCase.test_weird_chars_in_unquoted_attribute_valuesN)r3   r4   r5   r   r  r  r  r  r  r  r  r  r  r  r  r  r  r   r#  r%  r   r   r   r   r     s"   
	r   __main__)__doc__Zhtml.parserr   rG   Zunittesttestr   r	   r
   r   r6   r8   r<   ZTestCaser?   rM   r   r3   mainr   r   r   r   <module>   s(   @       L