
    5\h                     >   d Z ddlZddlZddlZddlmZ  G d dej        j                  Z	 G d de	          Z
 G d d	e	          Z G d
 de	          Z G d dej                  Z G d de          Z G d de          Zedk    r ej                     dS dS )zTests for HTMLParser.py.    N)supportc                   \    e Zd ZdddZd Zd Zd Zd Zd Zd	 Z	d
 Z
d Zd Zd Zd ZdS )EventCollectorF	autocdatac                    || _         g | _        | j        j        | _        t          j        j        j        | g|R i | |r|                     d           d S d S )NF)r   eventsappendhtmlparser
HTMLParser__init___set_support_cdata)selfr   argskws       >/opt/python-3.11.14/usr/lib/python3.11/test/test_htmlparser.pyr   zEventCollector.__init__   sn    "k(':t:::r::: 	+##E*****	+ 	+    c                     g }d }| j         D ]N}|d         }||cxk    rdk    r n nd|d         d         |d         z   f|d<   n|                    |           |}O|| _         |S )Nr   data   )r	   r
   )r   Lprevtypeeventtypes        r   
get_eventszEventCollector.get_events   s     [ 	 	E8Dx))))6)))))2qE!H!45"HHr   c                     |                      d||f           | j        r|dk    r|                     d           d S d S d S )NstarttagsvgTr
   r   r   r   tagattrss      r   handle_starttagzEventCollector.handle_starttag%   sU    Ze,---> 	*cUll##D)))))	* 	*llr   c                 6    |                      d||f           d S )Nstartendtagr
   r"   s      r   handle_startendtagz!EventCollector.handle_startendtag*   s!    ]C/00000r   c                     |                      d|f           | j        r|dk    r|                     d           d S d S d S )Nendtagr    Fr!   )r   r#   s     r   handle_endtagzEventCollector.handle_endtag-   sR    XsO$$$> 	+cUll##E*****	+ 	+llr   c                 4    |                      d|f           d S )Ncommentr(   r   r   s     r   handle_commentzEventCollector.handle_comment4       Y%&&&&&r   c                 4    |                      d|f           d S )Ncharrefr(   r/   s     r   handle_charrefzEventCollector.handle_charref7   r1   r   c                 4    |                      d|f           d S )Nr   r(   r/   s     r   handle_datazEventCollector.handle_data:       VTN#####r   c                 4    |                      d|f           d S )Ndeclr(   r/   s     r   handle_declzEventCollector.handle_decl=   r7   r   c                 4    |                      d|f           d S )N	entityrefr(   r/   s     r   handle_entityrefzEventCollector.handle_entityref@   s    [$'(((((r   c                 4    |                      d|f           d S )Npir(   r/   s     r   	handle_pizEventCollector.handle_piC   s    T4L!!!!!r   c                 4    |                      d|f           d S )Nunknown declr(   )r   r9   s     r   unknown_declzEventCollector.unknown_declF   s    ^T*+++++r   N)__name__
__module____qualname__r   r   r%   r)   r,   r0   r4   r6   r:   r=   r@   rC    r   r   r   r   
   s        (- + + + + +  "* * *
1 1 1+ + +' ' '' ' '$ $ $$ $ $) ) )" " ", , , , ,r   r   c                       e Zd Zd ZdS )EventCollectorExtrac                     t                               | ||           |                     d|                                 f           d S )Nstarttag_text)r   r%   r
   get_starttag_textr"   s      r   r%   z#EventCollectorExtra.handle_starttagL   sC    &&tS%888_d&<&<&>&>?@@@@@r   N)rD   rE   rF   r%   rG   r   r   rI   rI   J   s(        A A A A Ar   rI   c                       e Zd Zd Zd ZdS )EventCollectorCharrefsc                 0    |                      d           d S Nz6This should never be called with convert_charrefs=Truefailr/   s     r   r4   z%EventCollectorCharrefs.handle_charrefS       		JKKKKKr   c                 0    |                      d           d S rP   rQ   r/   s     r   r=   z'EventCollectorCharrefs.handle_entityrefV   rS   r   N)rD   rE   rF   r4   r=   rG   r   r   rN   rN   Q   s:        L L LL L L L Lr   rN   c                       e Zd Zd ZdS )EventCollectorNoNormalizec                     | j         S N)r	   r   s    r   r   z$EventCollectorNoNormalize.get_events]   s
    {r   N)rD   rE   rF   r   rG   r   r   rV   rV   \   s#            r   rV   c                   "    e Zd Zd ZddZd ZdS )TestCaseBasec                 "    t          d          S NFconvert_charrefs)r   rY   s    r   get_collectorzTestCaseBase.get_collectorc   s    u5555r   Nc                 t   ||                                  }|}|D ]}|                    |           |                                 |                                }||k    rW|                     dt          |          z   dz   t          j        |          z   dz   t          j        |          z              d S d S )Nz6received events did not match expected events
Source:
z
Expected:
z
Received:
)r`   feedcloser   rR   reprpprintpformat)r   sourceexpected_events	collectorr   sr	   s          r   
_run_checkzTestCaseBase._run_checkf   s    **,,I 	 	AKKNNNN""$$_$$II $&*6ll3%&(.(G(GH && )/v(>(>? @ @ @ @ @ %$r   c                 P    |                      ||t          d                     d S r]   )rk   rI   )r   rg   r	   s      r   _run_check_extrazTestCaseBase._run_check_extrat   s9    +UCCC	E 	E 	E 	E 	Er   rX   )rD   rE   rF   r`   rk   rm   rG   r   r   r[   r[   a   sO        6 6 6@ @ @ @E E E E Er   r[   c                      e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Z ej        dg d          d             Z ej        dg d          d             Z ej        dg d          d             Z ej        dg d          d             Z ej        dg d          d             Z ej        dg d          d             Z ej        dg d          d             Z ej        dg d          d             Z ej        dg d           d!             Z ej        dg d"          d#             Zd$ Zd% Zd& Zd' Zd( Zd) Z d* Z!d+ Z"d, Z#d- Z$d. Z%d/ Z&d0 Z'd1 Z(d2 Z) ej        dg d3          d4             Z*d5 Z+d6 Z, ej        dg d7          d8             Z-d9 Z.d: Z/ ej0        d;          d<             Z1d=S )>HTMLParserTestCasec                 b    |                      ddg           |                      ddg           d S )Nz<?processing instruction>)r?   zprocessing instructionz<?processing instruction ?>)r?   zprocessing instruction ?rk   rY   s    r    test_processing_instruction_onlyz3HTMLParserTestCase.test_processing_instruction_only{   sP    3,6 	 	 	 	5.8 	 	 	 	 	r   c                 d    |                      ddddddg fddddddd	d
dgfdddddddg           d S )Nz
<!DOCTYPE html PUBLIC 'foo'>
<HTML>&entity;&#32;
<!--comment1a
-></foo><bar>&lt;<?pi?></foo<bar
comment1b-->
<Img sRc='Bar' isMAP>sample
text
&#x201C;
<!--comment2a-- --comment2b-->
</Html>
r   
)r9   zDOCTYPE html PUBLIC 'foo'r   r   )r<   entity)r3   32)r.   z4comment1a
-></foo><bar>&lt;<?pi?></foo<bar
comment1bimg)srcBar)ismapN)r   zsample
text
)r3   x201C)r.   zcomment2a-- --comment2br+   r   rq   rY   s    r   test_simple_htmlz#HTMLParserTestCase.test_simple_html   st      )I9:*#	 	 	 	 	r   c                 x    |                      dddg fddg           |                      dgddg fddg           d S )	Nz<p>&#bad;</p>r   p)r   z&#bad;r+   r   z<div>&#bad;</div>divr+   r   rq   rY   s    r   test_malformatted_charrefz,HTMLParserTestCase.test_malformatted_charref   sl    b!*
 	 	 	 	,-#0
 	 	 	 	 	r   c                 6    |                      dddg           d S )Nz&entityref foo)r<   r<   )r   z foorq   rY   s    r   test_unclosed_entityrefz*HTMLParserTestCase.test_unclosed_entityref   s0    (&+ 	 	 	 	 	r   c                 F    |                      dddg fddg fddg           d S )Nz<a><b></a></b>r   abr+   r   r+   r   rq   rY   s    r   test_bad_nestingz#HTMLParserTestCase.test_bad_nesting   sF     	(b!b!	+ 	 	 	 	 	r   c                 4    |                      ddg           d S )N#this text & contains & ampersands &)r   r   rq   rY   s    r   test_bare_ampersandsz'HTMLParserTestCase.test_bare_ampersands   s.    =;@ 	 	 	 	 	r   c                 4    |                      ddg           d S )N.this < text > contains < bare>pointy< brackets)r   r   rq   rY   s    r   test_bare_pointy_bracketsz,HTMLParserTestCase.test_bare_pointy_brackets   s.    HFK 	 	 	 	 	r   c                 r    |                      ddddgfg           |                      ddddgfg           d S )N	<a b='<'>r   r   r   <	<a b='>'>r   >rq   rY   s    r   test_starttag_end_boundaryz-HTMLParserTestCase.test_starttag_end_boundary   sH    :sZL*I)JKKK:sZL*I)JKKKKKr   c                 t   dddgfg}|                      dg|           |                      ddg|           |                      ddg|           |                      d	d
g|           |                      ddg|           |                      ddg|           dddgfg}|                      dg|           |                      ddg|           |                      ddg|           |                      d	dg|           |                      ddg|           |                      ddg|           dg}|                      ddg|           |                      ddg|           |                      ddg|           |                      ddg|           |                      dd g|           |                      d!d"g|           |                      d#d$g|           |                      d%d&g|           |                      d'd(g|           |                      d)dg|           |                      ddg|           d S )*Nr   r   r   r   <a zb='<'>z<a bz='<'>z<a b=z'<'>z<a b='<z'>z<a b='<'r   r   r   zb='>'>z='>'>z'>'>z<a b='>z<a b='>')r.   abc z
<!--abc-->r   z	!--abc--><!z--abc--><!-z-abc--><!--zabc-->z<!--azbc-->z<!--abzc-->z<!--abcz-->z<!--abc-z->z	<!--abc--rq   r   outputs     r   test_buffer_artefactsz(HTMLParserTestCase.test_buffer_artefacts   sk   sZL12v...)6222)6222&)6222D)6222S)6222sZL12v...)6222)6222&)6222D)6222S)6222$%\*F333k*F333z*F333	*F333*F333'*F3336*F333E*F333T*F333c*F333r*F33333r   c                 V    g d}|D ]!}|                      d|z  dd|z   fg           "d S )N)
HTMLzOHTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"z[HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"zbhtml PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"zfhtml PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"zYmath PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"zhtml PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"zWsvg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"z'html PUBLIC "-//IETF//DTD HTML 2.0//EN"z,html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"z<!DOCTYPE %s>r9   DOCTYPE rq   )r   dtdsdtds      r   test_valid_doctypesz&HTMLParserTestCase.test_valid_doctypes   sc    @ @ @$  	: 	:COOOc1$j3&678: : : :	: 	:r   c                     |                      dddg fg           |                      dddg fdg           |                      dddg fddd	gfdg           d S )
Nz<p/>r'   r   z<p></p>r   r   z<p><img src='foo' /></p>rx   )ry   foorq   rY   s    r   test_startendtagz#HTMLParserTestCase.test_startendtag  s    C$! 	 	 	 		b!$ 	 	 	 	2b!EN#345 	 	 	 	 	r   c                 H    d}|                      |ddddgfd|fg           d S )Nz <foo:bar   
   one="1"	two=2   >r   zfoo:bar)one1)two2rK   )rm   )r   rj   s     r   test_get_starttag_textz)HTMLParserTestCase.test_get_starttag_text  sG    4a\<$@Aa "" 	# 	# 	# 	# 	#r   content)z*<!-- not a comment --> &not-an-entity-ref;<not a='start tag'>z<a href="" /> <p> <span></span>zfoo = "</scr" + "ipt>";zfoo = "</SCRIPT" + ">";zfoo = <
/script> z*<!-- document.write("</scr" + "ipt>"); -->z
//<![CDATA[
document.write('<s'+'cript type="text/javascript" src="http://www.example.org/r='+new Date().getTime()+'"><\/s'+'cript>');
//]]>z 
<!-- //
var foo = 3.14;
// -->
u   <!-- ☃ -->zfoo = "</ script>"zfoo = "</scripture>"zfoo = "</script>"u   foo = "</script >"u   foo = "</ſcript>"u   foo = "</scrıpt>"c                 N    d| d}|                      |ddg fd|fdg           d S )Nz<script>z	</script>r   scriptr   r+   r   rq   r   r   rj   s      r   test_script_contentz&HTMLParserTestCase.test_script_content  sO    , *w)))Z26#W-02 	3 	3 	3 	3 	3r   )
z0a::before { content: "<!-- not a comment -->"; }z-a::before { content: "&not-an-entity-ref;"; }z-a::before { content: "<not a='start tag'>"; }u   a::before { content: "☃"; }z#a::before { content: "< /style>"; }z#a::before { content: "</ style>"; }z#a::before { content: "</styled>"; }z#a::before { content: "</style>"; }u$   a::before { content: "</style >"; }u#   a::before { content: "</ſtyle>"; }c                 N    d| d}|                      |ddg fd|fdg           d S )Nz<style>z</style>r   styler   r+   r   rq   r   s      r   test_style_contentz%HTMLParserTestCase.test_style_content4  sO     (g'''Z"5#W-/1 	2 	2 	2 	2 	2r   )<!-- not a comment -->r   <![CDATA[not a cdata]]><!not a bogus comment></not a bogus comment>   ☃z	< /title>z	</ title>z	</titled>z	</title>u
   </title >u	   </tıtle>c                 N    d| d}|                      |ddg fd|fdg           d S )Nz<title>z</title>r   titler   r+   r   rq   r   r   rg   s      r   test_title_contentz%HTMLParserTestCase.test_title_contentF  sP     -7,,,"%W!
 	 	 	 	 	r   )r   r   r   r   r   r   z< /textarea>z</ textarea>z</textareable>z</textarea>u   </textarea >c                 N    d| d}|                      |ddg fd|fdg           d S )Nz
<textarea>z</textarea>r   textarear   r+   r   rq   r   s      r   test_textarea_contentz(HTMLParserTestCase.test_textarea_content\  sP     3g222R(W"!
 	 	 	 	 	r   r+   )r   SCRIPTzscript zscript
zscript/zscript foo=barzscript foo=">"c                 x    d}d| d| d}|                      |ddg fd|fdgt          d	
                     d S )Nz<!-- not a comment --> &not-an-entity-ref;
                  <a href="" /> </p><p> <span></span></style>
                  '</script' + '>'<ScrIPt></r   r   r   r   r   Fr^   ri   rk   rV   r   r+   r   rj   s       r   test_script_closing_tagz*HTMLParserTestCase.test_script_closing_tagq  sx    
& ,w++&+++Z26#W-02 #<U"S"S"S 	 	U 	U 	U 	U 	Ur   )r   STYLEzstyle zstyle
zstyle/zstyle foo=barzstyle foo=">"c                 x    d}d| d| d}|                      |ddg fd|fdgt          d	
                     d S )Nz
            b::before { content: "<!-- not a comment -->"; }
            p::before { content: "&not-an-entity-ref;"; }
            a::before { content: "<i>"; }
            a::after { content: "</i>"; }
            z<StyLE>r   r   r   r   r   r   Fr^   r   r   r   s       r   test_style_closing_tagz)HTMLParserTestCase.test_style_closing_tag  sx     +g*****Z"5#W-/1 #<U"S"S"S 	 	U 	U 	U 	U 	Ur   )r   TITLEztitle ztitle
ztitle/ztitle foo=barztitle foo=">"c                     d}d| d| d}|                      |ddg fddgt          d	
                     |                      |ddg fddddgt          d
                     d S )N+<!-- not a comment --><i>Egg &amp; Spam</i>z<TitLe>r   r   r   r   r   z'<!-- not a comment --><i>Egg & Spam</i>r   Tr^   r   r   z<!-- not a comment --><i>Egg r<   ampr   z	 Spam</i>Fr   r   s       r   test_title_closing_tagz)HTMLParserTestCase.test_title_closing_tag  s     @*g*****Z"5O/1 #<T"R"R"R 	 	T 	T 	T 	Z"5E01/	1
 #<U"S"S"S 	 	U 	U 	U 	U 	Ur   )r   TEXTAREAz	textarea z	textarea
z	textarea/ztextarea foo=barztextarea foo=">"c                     d}d| d| d}|                      |ddg fddgt          d	
                     |                      |ddg fddddgt          d
                     d S )Nr   z
<TexTarEa>r   r   r   r   r   r   Tr^   r   r   r   r   Fr   r   s       r   test_textarea_closing_tagz,HTMLParserTestCase.test_textarea_closing_tag  s     @---F---ZR8O24 #<T"R"R"R 	 	T 	T 	T 	ZR8E012	4
 #<U"S"S"S 	 	U 	U 	U 	U 	Ur   ztail,end)r   Fr   Fr   F)z</sF)z</scriptF)z	</script T)z</script foo=barT)z</script foo=">Tc                     d}d| | }|                      |ddg fd|r|n||z   fgt          d                     d S )	Nza = 123r   r   r   r   Fr^   r   r   )r   tailendr   rj   s        r   test_eof_in_scriptz%HTMLParserTestCase.test_eof_in_script  sy     &w&&&Z26#%GWW4HJ";U"S"S"S 	 	U 	U 	U 	U 	Ur   )r   r   r   )z</tF)z</titleF)z</title T)z</title foo=barT)z</title foo=">Tc           	          d| }|                      |ddg fdd|rdn|z   fgt          d          	           |                      |ddg fd
ddd|rdn|z   fgt          d          	           d S )Nz<TitLe>Egg &amp; Spamr   r   r   z
Egg & Spamr   Tr^   r   )r   zEgg r   z SpamFr   )r   r   r   rj   s       r   test_eof_in_titlez$HTMLParserTestCase.test_eof_in_title  s     +D**Z"5#\35HRRD%IJL";T"R"R"R 	 	T 	T 	T 	Z"5,0#Wc0Ct%DEG #<U"S"S"S	 	 	U 	U 	U 	U 	Ur   c                 >    d}g d}|                      ||           d S )Na.  <!-- I'm a valid comment --><!--me too!--><!------><!-----><!----><!---><!--><!----I have many hyphens----><!-- I have a > in the middle --><!-- and I have -- in the middle! --><!--incorrectly-closed-comment--!><!----!><!----!--><!---- >--><!---!>--><!--!>--><!-- <!-- nested --> --><!--<!--><!--<!--!>))r.   z I'm a valid comment )r.   zme too!r.   z--r.   -r.   r   r   r   )r.   z--I have many hyphens--)r.   z I have a > in the middle )r.   z and I have -- in the middle! )r.   zincorrectly-closed-commentr   )r.   z--!)r.   z-- >r.   z-!>)r.   z!>)r.   z <!-- nested )r   z -->r.   r   r   rq   r   r   expecteds      r   test_commentsz HTMLParserTestCase.test_comments  s6    ,
 
 
( 	h'''''r   c                 >    d}g d}|                      ||           d S )Nzs<!--[if IE & !(lte IE 8)]>aren't<![endif]--><!--[if IE 8]>condcoms<![endif]--><!--[if lte IE 7]>pretty?<![endif]-->))r.   z%[if IE & !(lte IE 8)]>aren't<![endif])r.   z[if IE 8]>condcoms<![endif])r.   z[if lte IE 7]>pretty?<![endif]rq   r   s      r   test_condcomsz HTMLParserTestCase.test_condcoms  s9    8C C C 	h'''''r   c           
         d }|                       |            j                   g d}dddgfddg}|D ]5}|                     d                    |          | |            	           6d
ddg dfd
dd
g}|D ]5}|                     d                    |          | |            	           6|D ]d}d                    |gdz            }d
ddg fd|fdd
ddg fd|fdd
g	}|                     d                    ||          | |            	           ed}t          dt          |                    D ]5}|                     |d |         d|d |         fg |            	           6|                     ddg |            	           d S )Nc                      t                      S rX   )rN   rG   r   r   <lambda>z:HTMLParserTestCase.test_convert_charrefs.<locals>.<lambda>  s    244 r   )z&quot;z&#34;z&#x22;z&quotz&#34z&#x22r   r   )hrefzfoo"zar)r   za"zr   z<a href="foo{0}zar">a{0}z</a>r   )r   "))xr   )yz"X)zzX"z*{0}<a x="{0}" y="{0}X" z="X{0}">{0}</a>{0}X   r   r   r   r   r   z/{1}<script>{0}</script>{1}<style>{0}</style>{1}z&quo &# &#xr   no charrefs here)r   r  )
assertTruer_   rk   formatjoinrangelen)r   ri   charrefsr   r3   textr   r   s           r   test_convert_charrefsz(HTMLParserTestCase.test_convert_charrefs
  sG   44			4555JJJ':&;<#_6 	= 	=GOO;BB7KK$		  = = = = "&L&L&LM!?MC   	= 	=GOO ))/$		  = = = =   		= 		=G88WIaK((D%#Xr2VTN,m#Wb1FD>+]	<H
 OO 44:F44I4I$		  = = = = q#d))$$ 	3 	3AOOD!HRaR'9&:&/ikk  3 3 3 3 	*-I,J"+)++ 	 	/ 	/ 	/ 	/ 	/r   c           
      P    |                      ddddgfdddddd	gfd
dg           d S )NzF<html <html>te>>xt&a<<bc</a></html>
<img src="URL><//img></html</html>r   r   )z<htmlN)r   zte>>xt)r<   r   r   r   zbc<)r   Nr}   rt   rq   rY   s    r   test_tolerant_parsingz(HTMLParserTestCase.test_tolerant_parsing3  sT     ='/1BC..)'>.*?,	- 	- 	- 	- 	-r   c                    |                      ddg           |                      ddg           |                      ddg           |                      ddg           |                      d	g            |                      d
dg           |                      ddg           |                      ddg           |                      dg            |                      ddg           |                      ddg           |                      dddg fg           |                      ddg           |                      ddg           |                      dg            |                      dg            |                      dg            |                      dg            |                      dg            |                      d dd!g fg           |                      d"dd#g fg           |                      d$d%d#g fg           |                      d&dd#g fg           |                      d'd%d#g fg           |                      d(d)g           d S )*Nr   r  <>)r   r  < >)r   r  < )r   r  z</><$>)r   r  z</$>)r.   $r   )r   r   z</az</ a>)r.   z az</ az<a<a>r   a<az</a<a>)r+   r  r   r   z<az<a foo='bar'z<a foo='barz
<a foo='>'z	<a foo='>z<a$>za$z<a$b>a$bz<a$b/>r'   z<a$b  >z<a$b  />z</a$b>)r+   r  rq   rY   s    r   test_starttag_junk_charsz+HTMLParserTestCase.test_starttag_junk_chars>  s`   m_---~.///0111~.///r"""0111!1 2333~.///r""""3!4555!2 3444:ub"9!:;;;#4"5666/000b!!!+++r***b)))R(((*dB!7 8999:ub"9!:;;;M5"#=">???	Z$;#<===
mUB%?$@AAA#4"566666r   c           
      <   |                      ddddgfg           d}ddg dfg}|                      ||           d}ddg d	fd
dg d	fg}|                      ||           d}d
dg fd
dg fd
dg fd
dg fddg fddg fddg fddg fg}|                      ||           d S )Nz<a foo="var"/>r'   r   )r   varzj<img width=902 height=250px src="/sites/default/files/images/homepage/foo.jpg" /*what am I doing here*/ />rx   ))width902)height250px)ry   z,/sites/default/files/images/homepage/foo.jpg)z*whatN)amN)iN)doingN)zhere*Nz9<a / /foo/ / /=/ / /bar/ / /><a / /foo/ / /=/ / /bar/ / >))r   N)=N)barNr   zD<meta><meta / ><meta // ><meta / / ><meta/><meta /><meta //><meta//>metarq   r   s      r   test_slashes_in_starttagz+HTMLParserTestCase.test_slashes_in_starttagY  s#   (M3@P+Q*RSSS. 5/ / /
  	h'''/ C!L!L!LMIIIJ
 	h'''3 $z62&>$z62&>FB'-)DFB'-)D	
 	h'''''r   c                 b    |                      ddg           |                      ddg           d S )Nz</a/>r   z</a foo="var"/>rq   rY   s    r   test_slashes_in_endtagz)HTMLParserTestCase.test_slashes_in_endtagx  s7    /!2333)O+<=====r   c                 4    |                      ddg           d S )Nz<!DOCTYPE foo $ >)r9   zDOCTYPE foo $ rq   rY   s    r   test_declaration_junk_charsz.HTMLParserTestCase.test_declaration_junk_chars|  s!    +.H-IJJJJJr   c                 4    |                      ddg           d S )Nz"<!spacer type="block" height="25">)r.   zspacer type="block" height="25"rq   rY   s    r   test_illegal_declarationsz,HTMLParserTestCase.test_illegal_declarations  s0    <GH	J 	J 	J 	J 	Jr   c                 |    d}ddg fdddg fdddg fdddg fdddg fdddg fddg fg}|                      ||           d S )Nzn<br></label</p><br></div end tmAd-leaderBoard><br></<h4><br></li class="unit"><br></li
						</ul><br></><br>r   br)r+   zlabel<r   )r.   z<h4)r+   lirq   r   s      r   test_invalid_end_tagsz(HTMLParserTestCase.test_invalid_end_tags  s    Mr*(r*%r*&r*$r*$r*r*#,$ 	h'''''r   c                 H    d}ddg fdddg}|                      ||           d S )Nz(<b>This</b attr=">"> confuses the parserr   r   )r   Thisr   )r   z confuses the parserrq   r   s      r   test_broken_invalid_end_tagz.HTMLParserTestCase.test_broken_invalid_end_tag  s=    9b)$#46 	h'''''r   c                     d}dddgfddg fddddgfd	d
dddg fdddg fddddg}|                      ||           d}ddg dfddg fddddgfd	d
g}|                      ||           d S )Nz[<div style=""    ><b>The <a href="some_url">rain</a> <br /> in <span>Spain</span></b></div>r   r   r   r   r   )r   zThe r   )r   some_url)r   rainr   r    r'   r,  )r   z in span)r   Spainr+   r8  r   r   z><div style="", foo = "bar" ><b>The <a href="some_url">rain</a>)r3  ,Nr   r"  rq   r   s      r   $test_correct_detection_of_start_tagsz7HTMLParserTestCase.test_correct_detection_of_start_tags  s    9 0b!345D"%$
  	h'''O L L LMb!345
 	h'''''r   c                 j    ddgfddgfdg dfddd	gfg}|D ]\  }}|                      ||           d S )
Na&)r   r@  za&b)r   abza&b )r   r   r<   r   r6  za&b;rB  rC  rq   r   r   r   r   s       r   test_EOF_in_charrefz&HTMLParserTestCase.test_EOF_in_charref  st     N#$^$%GGGHm%789	
 # 	, 	,ND(OOD(++++	, 	,r   c                     ddgfddgfddgfddgfddgfd	dgfd
dgfddgfddgfddgfddgfddgfddgfddgfg}|D ]\  }}|                      ||           d S )Nr   r   z<!---z<!----z<!-----r   z<!------r   z<!----!z<!---!)r.   z-!z<!---!>r   z<!--foo)r.   r   z<!--foo-z	<!--foo--z
<!--foo--!z<!--<!--r   z	<!--<!--!rq   rD  s       r   test_eof_in_commentsz'HTMLParserTestCase.test_eof_in_comments  s    o&''(())*++,-)*)*++,-+,-,-.-././0+,-,-.
  # 	, 	,ND(OOD(++++	, 	,r   c                     ddgfddgfddgfddgfd	d
gfddgfddgfddgfddgfddgfg
}|D ]\  }}|                      ||           d S )Nr   r   r   r   z<![)r.   [z	<!DOCTYPE)r9   DOCTYPEz
<!DOCTYPE )r9   r   z<!DOCTYPE html)r9   zDOCTYPE htmlz<!DOCTYPE html )r9   zDOCTYPE html z<!DOCTYPE html PUBLIC)r9   zDOCTYPE html PUBLICz<!DOCTYPE html PUBLIC "foo)r9   zDOCTYPE html PUBLIC "fooz6<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo)r9   z4DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foorq   rD  s       r   test_eof_in_declarationsz+HTMLParserTestCase.test_eof_in_declarations  s    O$%%&'%&'./0012 89:!: ;<$'F&GH),P+QRENOQ
 # 	, 	,ND(OOD(++++	, 	,r   )r   r   zx]zx]]c                     |                      d|z   dd|z   fg           |                      d|z   dd|z   fgt          d                     |                      d	|z   d
dg fd
ddgfdd|z   fg           d S )Nz	<![CDATA[rB   CDATA[r.   z[CDATA[Tr   r   <svg><text y="100"><![CDATA[r   r    r
  r   100rk   r   )r   r   s     r   test_eof_in_cdataz$HTMLParserTestCase.test_eof_in_cdata  s    g-((W*<=>	@ 	@ 	@g-#Y%89:"04"@"@"@ 	 	B 	B 	B 	6@$eR0$f|n=((W*<=?	@ 	@ 	@ 	@ 	@r   c                 >    d}g d}|                      ||           d S )Nz<!ELEMENT br EMPTY><! not really a comment ><! not a comment either --><! -- close enough --><!><!<-- this was an empty comment><!!! another bogus comment !!!>))r.   zELEMENT br EMPTY)r.   z not really a comment )r.   z not a comment either --)r.   z -- close enough --r   )r.   z<-- this was an empty comment)r.   z!! another bogus comment !!!rq   r   s      r   test_bogus_commentsz&HTMLParserTestCase.test_bogus_comments  s6    2
 
 
 	h'''''r   c                 z    d}dddddddgfdd	dd
dgfddddg fdddddd
dgfdg}|                      ||           d S )Nz<![if !(IE)]>broken condcom<![endif]><![if ! IE]><link href="favicon.tiff"/><![endif]><![if !IE 6]><img src="firefox.png" /><![endif]><![if !ie 6]><b>foo</b><![endif]><![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>)rB   zif !(IE))r   zbroken condcom)rB   endif)rB   zif ! IEr'   link)r   zfavicon.tiff)rB   zif !IE 6rx   )ry   zfirefox.png)rB   zif !ie 6r   r   )r   r   r   )rB   zif (!IE)|(lt IE 9))ry   zmammoth.bmprq   r   s      r   test_broken_condcomsz'HTMLParserTestCase.test_broken_condcoms  s    M )&%'F%=$>?%(E$:#;<%(b!%2E$:#;<%#
( 	h'''''r   )zjust some plain textr   z&not-an-entity-ref;r   r   z[[I have many brackets]]zI have a > in the middlezI have a ]] in the middlez] ]>z]] >zN
    if (a < b && a > b) {
        printf("[<marquee>How?</marquee>]");
    }
c                     d| d}ddg fdddgfdd|z   fd	d
g}|                      ||           |                      ||t          d                     d S )NrN  z]]></text></svg>r   r    r
  rO  rB   rM  r+   r
  r+   r    Tr   r   rQ  )r   r   r   r   s       r   test_cdata_section_contentz-HTMLParserTestCase.test_cdata_section_content9  s    & HgGGG#,0X/0
 	h'''h.42P2P2PQQQQQr   c           	      z    d}ddddg fdddgfdd	d
ddg	}|                      ||t          d                     d S )Nzb<![CDATA[foo<br>bar]]><svg><text y="100"><![CDATA[foo<br>bar]]></text></svg><![CDATA[foo<br>bar]]>)r.   z[CDATA[foo<br)r   zbar]]>r   r    r
  rO  )rB   zCDATA[foo<br>barrZ  r[  Tr   r   rQ  r   s      r   test_cdata_sectionz%HTMLParserTestCase.test_cdata_sectionW  sh    ) )#,00(

 	h.42P2P2PQQQQQr   c                     t          d          }|                    d           |                     |                                dddg fddd	g           d S )
NTr^   zfoo <a>link</a> bar &amp; baz)r   zfoo r   r   )r   rW  r   )r   z
 bar & baz)r   rb   assertEqualr   )r   r   s     r   "test_convert_charrefs_dropped_textz5HTMLParserTestCase.test_convert_charrefs_dropped_texti  sr      6663444
C46F46	
 	
 	
 	
 	
r   cpuc                 (   d }d} |d|z              |d|z              |d|z              |d|z              |d|z              |d|z              |d	|z              |d
|z              |d|z              |d|z             d S )Nc                     t           j                                        }|                    |            |                                 d S rX   )r   r   r   rb   rc   )rg   r   s     r   checkzBHTMLParserTestCase.test_eof_no_quadratic_complexity.<locals>.checky  s8    [++--FKKLLNNNNNr   i r   z<a a=z8</a </a </a </a </a </a </a </a </a </a </a </a </a </a zB</a a=</a a=</a a=</a a=</a a=</a a=</a a=</a a=</a a=</a a=</a a=z<!--<!--<!--<!--zx<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!<!z&<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?<?z-</$</$</$</$</$</$</$</$</$</$</$</$</$</$</$zQ<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[<![CDATA[a;  <!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctype<!doctyperG   )r   re  ns      r    test_eof_no_quadratic_complexityz3HTMLParserTestCase.test_eof_no_quadratic_complexityu  s    	 	 	 eaigkkAoma   j1ni!mi!mj1no!""""#####r   N)2rD   rE   rF   rr   r~   r   r   r   r   r   r   r   r   r   r   r   subTestsr   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r$  r&  r(  r*  r.  r1  r>  rE  rG  rK  rR  rT  rX  r\  r^  ra  requires_resourcerg  rG   r   r   ro   ro   y   s/           @    	 	 	  
  
L L L4 4 4<: : :0  # # # Wi "
 "
 "
  *3 3+ *3 Wi "
 "
 "
  2 2 2 Wi "
 "
 "
     Wi "
 "
 "
     Wh !P !P !P Q Q
U 
UQ Q
U Wh !M !M !M N NU UN NU Wh !M !M !M N NU UN NU Wh !V !V !V W WU UW WU Wj 	# 	# 	# 	 	U U	 	U Wj 	# 	# 	# 	 		U 	U	 		U+( +( +(Z( ( (%/ %/ %/R	- 	- 	-7 7 76( ( (>> > >K K KJ J J( ( (4( ( (( ( (B, , ,, , ,(, , ," Wi!7!7!788	@ 	@ 98	@( ( ($ (  (  (D Wi " " "   R R!  RR R R$

 

 

 Wu%%$ $ &%$ $ $r   ro   c                   t    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd ZdS )AttributesTestCasec                 (   ddg dfg}|                      d|           |                      ddddgfg           |                      ddddgfg           |                      d	dddgfg           |                      d
dddgfg           |                      ddddgfg           |                      ddddgfg           |                      ddddgfg           |                      ddddgfg           |                      ddddgfg           d S )Nr   r   ))r   v)crm  )drm  )eNz<a b='v' c="v" d=v e>z<a foo==bar>)r   z=barz<a foo =bar>r=  z<a foo	=bar>z<a foo=bar>)zfoor"  u   <a foo =bar>)u   foo r"  z<a foo= bar>z<a foo=	bar>z<a foo=bar>)r   zbaru   <a foo= bar>)r   u    barrq   r   s     r   test_attr_syntaxz#AttributesTestCase.test_attr_syntax  sf   sMMM
N
 	3V<<<*cO;L)M(NOOO*cN;K)L(MNNN:s^<L*M)NOOO:s=M<N*O)PQQQ)Z?Q>R,S+TUUU*cN;K)L(MNNN:s^<L*M)NOOO:s=M<N*O)PQQQ)Z?Q>R,S+TUUUUUr   c                 R   |                      dddg dfg           |                      dddddgfg           |                      dddd	d
gfg           |                      dddddgfg           |                      ddddgfg           |                      ddddgfg           d S )Nz'<a b='xxx
	xxx' c="yyy	
yyy" d='	xyz
'>r   r   ))r   zxxx
	xxx)rn  zyyy	
yyy)ro  z	xyz
z<a b='' c="">)r   r   )rn  r   z<a b=	x c=
y>)r   r   )rn  r   u   <a b= c= >)r   )rn      z<e a=rgb(1,2,3)>rp  )r   z
rgb(1,2,3)z<a href=mailto:xyz@example.com>)r   zmailto:xyz@example.comrq   rY   s    r   test_attr_valuesz#AttributesTestCase.test_attr_values  s3   K$c ,> ,> ,> ? @	A 	A 	A 	+$cIy+ABC	E 	E 	E)$cJ
+CDE	G 	G 	G)$cK+GHI	K 	K 	K 	*$c,?+@AB	D 	D 	D 	-# BCDE	G 	G 	G 	G 	Gr   c                     |                      dddddgfg           |                      ddddd	gfg           |                      d
dddd	gfg           d S )Nu!   <img src=/foo/bar.png alt=中文>r   rx   )ry   z/foo/bar.png)altu   中文u+   <a title='テスト' href='テスト.html'>r   )r   u	   テスト)r   u   テスト.htmlu+   <a title="テスト" href="テスト.html">rq   rY   s    r   test_attr_nonasciiz%AttributesTestCase.test_attr_nonascii  s    5%"9"9"; < =	> 	> 	> 	K# ? C E F G	H 	H 	H 	K# ? C E F G	H 	H 	H 	H 	Hr   c                 <    |                      ddddgfg           d S )Nz!<a b='&amp;&gt;&lt;&quot;&apos;'>r   r   )r   z&><"'rq   rY   s    r   test_attr_entity_replacementz/AttributesTestCase.test_attr_entity_replacement  s5    /#012	4 	4 	4 	4 	4r   c                 >    |                      dddg dfg           d S )Nz<a a.b='v' c:d=v e-f=v>r   r   ))za.brm  )zc:drm  )ze-frm  rq   rY   s    r   test_attr_funky_namesz(AttributesTestCase.test_attr_funky_names  s>    %#IIIJK	M 	M 	M 	M 	Mr   c                 <    |                      ddddgfg           d S )Nz0<html foo='&euro;&amp;&#97;&#x61;&unsupported;'>r   r   )r   u   €&aa&unsupported;rq   rY   s    r   test_entityrefs_in_attributesz0AttributesTestCase.test_entityrefs_in_attributes  s;    >&#D"EFG	I 	I 	I 	I 	Ir   c                 P    |                      ddddgfdddgfdddgfg           d S )	Nz<a $><b $=%><c \=/>r   r   )r  Nr   )r  %rn  )\/rq   rY   s    r   test_attr_funky_names2z)AttributesTestCase.test_attr_funky_names2  sN    "#}-#
|,#}-/	0 	0 	0 	0 	0r   c                     dD ]\}|                      d|z  dddgfg           |                      d|z  dddgfg           |                      d|z  dddgfg           ]d S )N)&z&amp;z&#38;z&#x26;z<a href="%s">r   r   )r   r  z<a href='%s'>z<a href=%s>rq   )r   rv   s     r    test_entities_in_attribute_valuez3AttributesTestCase.test_entities_in_attribute_value  s    7 	B 	BFOOOf4(#?@B B BOOOf4(#?@B B BOOMF2(#?@B B B B	B 	Br   c                 r    d}dddgfdddddgfddddd	gfd
ddddgfddg}|                      ||           d S )Nz<a href=test'style='color:red;bad1'>test - bad1</a><a href=test'+style='color:red;ba2'>test - bad2</a><a href=test'&nbsp;style='color:red;bad3'>test - bad3</a><a href = test'&nbsp;style='color:red;bad4'  >test - bad4</a>r   r   )r   ztest'style='color:red;bad1')r   ztest - bad1r   )r   ztest'+style='color:red;ba2')r   ztest - bad2)r   u   test' style='color:red;bad3')r   ztest - bad3)r   u   test' style='color:red;bad4')r   ztest - bad4rq   r   s      r   test_malformed_attributesz,AttributesTestCase.test_malformed_attributes  s    L 	 FGH#FGH#JKL#JKL#
 	h'''''r   c                     |                      dddg fddddgfdg           |                      d	ddg fdddd
gfdg           d S )Nz<x><y z=""o"" /></x>r   r   r'   r   )r   r   )zo""N)r+   r   z<x><y z="""" /></x>)z""Nrq   rY   s    r   "test_malformed_adjacent_attributesz5AttributesTestCase.test_malformed_adjacent_attributes  s    .$c2.*C)]1KL+-	. 	. 	. 	-$c2.*C)\1JK+-	. 	. 	. 	. 	.r   c                 v    |                      dddddgfg           |                      dddddgfg           d S )	Nz<a width="100%"cellspacing=0>r   r   r  z100%cellspacing0z<a id="foo"class="bar">)idr   classr"  rq   rY   s    r   test_adjacent_attributesz+AttributesTestCase.test_adjacent_attributes  st    7$c,.ABD E	F 	F 	F 	1$c(/:< =	> 	> 	> 	> 	>r   c                 <    |                      ddddgfg           d S )Nz<a v=>r   r   )rm  r   rq   rY   s    r   test_missing_attribute_valuez/AttributesTestCase.test_missing_attribute_value  s3    $cI;78	: 	: 	: 	: 	:r   c                 <    |                      ddddgfg           d S )Nz-<a href=javascript:popup('/popup/help.html')>r   r   )r   z$javascript:popup('/popup/help.html')rq   rY   s    r   test_javascript_attribute_valuez2AttributesTestCase.test_javascript_attribute_value  sA    G$cKLN O	P 	P 	P 	P 	Pr   c                 @    |                      ddddgfddg           d S )Nz-<a href='http://www.example.org/">;'>spam</a>r   r   )r   zhttp://www.example.org/">;)r   spamr   rq   rY   s    r   test_end_tag_in_attribute_valuez2AttributesTestCase.test_end_tag_in_attribute_value  s@    H$cBCE)?<	= 	= 	= 	= 	=r   c                     d}ddg fddddgfddg dfdd	g fdd
dgfdddgfddddgfdddgfdddddddgfdddgfddddg}|                      ||           d S )Nz<html><body bgcolor=d0ca90 text='181008'><table cellspacing=0 cellpadding=1 width=100% ><tr><td align=left><font size=-1>- <a href=/rabota/><span class=en> software-and-i</span></a>- <a href='/1/'><span class=en> library</span></a></table>r   r   body)bgcolord0ca90)r
  181008table)r  )cellpaddingr   r  trtd)alignleftfont)sizez-1)r   z- r   )r   z/rabota/r8  )r  en)r   z software-and-ir:  r   )r   z/1/)r   z library)r+   r  rq   r   s      r   test_with_unquoted_attributesz0AttributesTestCase.test_with_unquoted_attributes  s    M $"79K!LMOOOQr" 123.!12Z/C.DE/!235PZ.?@/!235I1D
 	h'''''r   c                     d}dddgfddddgfddddgfddg d	fdddd
gfddddgfddddgfddddgfddddgfg	}|                      ||           d S )Nz<div class=bar,baz=asd><div class="bar",baz="asd"><div class=bar, baz=asd,><div class="bar", baz="asd",><div class="bar",><div class=,bar baz=,asd><div class=,"bar" baz=,"asd"><div ,class=bar ,baz=asd><div class,="bar" baz,="asd">r   r   )r  zbar,baz=asdr  )z,bazasd)r  zbar,)bazzasd,)r  r;  )r  r  r;  r;  )r  z,bar)r  z,asd)r  z,"bar")r  z,"asd")z,classr"  )zclass,r"  )zbaz,r  rq   r   s      r   test_comma_between_attributesz0AttributesTestCase.test_comma_between_attributes3  s    0 !9 ;<!1? CD!2O DE !> !> !> ?!1; ?@!2O DE!46G HI!2O DE!2O DE
 	h'''''r   c                 <    |                      ddddgfg           d S )Nz<form action=bogus|&#()value>r   form)actionzbogus|&#()valuerq   rY   s    r   -test_weird_chars_in_unquoted_attribute_valuesz@AttributesTestCase.test_weird_chars_in_unquoted_attribute_valuesO  sA    7'!> ?A:B 	C 	C 	C 	C 	Cr   N)rD   rE   rF   rq  ru  rx  rz  r|  r~  r  r  r  r  r  r  r  r  r  r  r  rG   r   r   rk  rk    s-       V V VG G G&H H H4 4 4
M M M
I I I0 0 0B B B( ( (0	. 	. 	.> > >: : :P P P
= = =( ( (0( ( (8C C C C Cr   rk  __main__)__doc__html.parserr   re   unittesttestr   r   r   r   rI   rN   rV   TestCaser[   ro   rk  rD   mainrG   r   r   <module>r     s                =, =, =, =, =,T[+ =, =, =,@A A A A A. A A AL L L L L^ L L L       
E E E E E8$ E E E0N$ N$ N$ N$ N$ N$ N$ N$bHC HC HC HC HC HC HC HCT zHMOOOOO r   