a
    #Rîh!P  ã                   @   sÞ   d Z ddlZddlZddlmZ dgZe d¡Ze d¡Ze d¡Z	e d¡Z
e d	¡Ze d
¡Ze d¡Ze d¡Ze d¡Ze d¡Ze dej¡Ze dej¡Ze dej¡Ze d¡Ze d¡ZG dd„ dejƒZdS )zA parser for HTML and XHTML.é    N)ÚunescapeÚ
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]ú>z--!?>z-?>z0([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*a{  
  (
    (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
   )
  ([\t\n\r\f ]*=[\t\n\r\f ]*        # value indicator
    ('[^']*'                        # LITA-enclosed value
    |"[^"]*"                        # LIT-enclosed value
    |(?!['"])[^>\t\n\r\f ]*         # bare value
    )
   )?
  (?:[\t\n\r\f ]|/(?!>))*           # possibly followed by a space
a  
  [a-zA-Z][^\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:[\t\n\r\f ]*=[\t\n\r\f ]*    # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   @   sþ   e Zd ZdZdZdZddœdd„Zdd	„ Zd
d„ Zdd„ Z	dZ
dd„ Zddœdd„Zdd„ Zd>dd„Zdd„ Zdd„ Zd?dd„Zd@d d!„Zd"d#„ Zd$d%„ Zd&d'„ Zd(d)„ Zd*d+„ Zd,d-„ Zd.d/„ Zd0d1„ Zd2d3„ Zd4d5„ Zd6d7„ Zd8d9„ Zd:d;„ Zd<d=„ Z dS )Ar   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )ZscriptÚstyle)ZtextareaÚtitleT)Úconvert_charrefsc                C   s   || _ |  ¡  dS )zÆInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r   Úreset)Úselfr   © r
   ú3/opt/python-3.9.24/usr/lib/python3.9/html/parser.pyÚ__init__s   s    zHTMLParser.__init__c                 C   s4   d| _ d| _t| _d| _d| _d| _tj 	| ¡ dS )z1Reset this instance.  Loses all unprocessed data.Ú z???NT)
ÚrawdataÚlasttagÚinteresting_normalÚinterestingÚ
cdata_elemÚ_support_cdataÚ
_escapableÚ_markupbaseÚ
ParserBaser   ©r	   r
   r
   r   r   |   s    zHTMLParser.resetc                 C   s   | j | | _ |  d¡ dS )z‘Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   Úgoahead©r	   Údatar
   r
   r   Úfeed†   s    zHTMLParser.feedc                 C   s   |   d¡ dS )zHandle any buffered data.é   N)r   r   r
   r
   r   Úclose   s    zHTMLParser.closeNc                 C   s   | j S )z)Return full source of start tag: '<...>'.)Ú_HTMLParser__starttag_textr   r
   r
   r   Úget_starttag_text•   s    zHTMLParser.get_starttag_textF©Ú	escapablec                C   sX   |  ¡ | _|| _|r8| js8t d| j tjtjB ¡| _nt d| j tjtjB ¡| _d S )Nz&|</%s(?=[\t\n\r\f />])z</%s(?=[\t\n\r\f />]))	Úlowerr   r   r   ÚreÚcompileÚ
IGNORECASEÚASCIIr   )r	   Úelemr!   r
   r
   r   Úset_cdata_mode™   s    


ÿ
ÿzHTMLParser.set_cdata_modec                 C   s   t | _d | _d| _d S )NT)r   r   r   r   r   r
   r
   r   Úclear_cdata_mode£   s    zHTMLParser.clear_cdata_modec                 C   s
   || _ dS )a  Enable or disable support of the CDATA sections.
        If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>".
        If disabled, "<[CDATA[" starts a bogus comments which ends with ">".

        This method is not called by default. Its purpose is to be called
        in custom handle_starttag() and handle_endtag() methods, with
        value that depends on the adjusted current node.
        See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
        for details.
        N)r   )r	   Úflagr
   r
   r   Ú_set_support_cdata¨   s    zHTMLParser._set_support_cdatac                 C   sB  | j }d}t|ƒ}||k rÚ| jrv| jsv| d|¡}|dk r | dt||d ƒ¡}|dkrpt d¡ 	||¡spqÚ|}n*| j
 	||¡}|r’| ¡ }n| jrœqÚ|}||k rÞ| jrÌ| jrÌ|  t|||… ƒ¡ n|  |||… ¡ |  ||¡}||kröqÚ|j}|d|ƒr<t ||¡r"|  |¡}	nŒ|d|ƒr:|  |¡}	nt|d|ƒrR|  |¡}	n\|d|ƒrj|  |¡}	nD|d	|ƒr‚|  |¡}	n,|d
 |k s–|rÚ|  d¡ |d
 }	nqÚ|	dk r.|sÂqÚt ||¡rÔnV|d|ƒr$|d |krú|  d¡ n&t ||¡r
n|  ||d d … ¡ n|d|ƒr||}dD ]*}
| |
|d ¡r8|t|
ƒ8 } qdq8|  ||d |… ¡ n®|d|ƒr¨| jr¨|  ||d d … ¡ n‚|||d …  ¡ dkrÚ|  ||d d … ¡ nP|d	|ƒrþ|  ||d d … ¡ n,|d|ƒr"|  ||d d … ¡ nt dƒ‚|}	|  ||	¡}q|d|ƒrât! ||¡}|r¤| "¡ dd… }|  #|¡ | $¡ }	|d|	d
 ƒs”|	d
 }	|  ||	¡}qn<d||d … v rÚ|  |||d … ¡ |  ||d ¡}qÚq|d|ƒrÌt% ||¡}|rB| "d
¡}|  &|¡ | $¡ }	|d|	d
 ƒs4|	d
 }	|  ||	¡}qt' ||¡}|rœ|rÚ| "¡ ||d … krÚ| $¡ }	|	|kr†|}	|  ||d
 ¡}qÚn.|d
 |k rÚ|  d¡ |  ||d
 ¡}nqÚqdsJ dƒ‚q|r0||k r0| jr| jr|  t|||… ƒ¡ n|  |||… ¡ |  ||¡}||d … | _ d S )Nr   ú<ú&é"   z[\t\n\r\f ;]ú</ú<!--ú<?ú<!r   é   )z--!z--ú-é   ú	<![CDATA[é   é	   ú	<!doctypezwe should not get here!z&#éÿÿÿÿú;zinteresting.search() lied)(r   Úlenr   r   ÚfindÚrfindÚmaxr#   r$   Úsearchr   Ústartr   Úhandle_datar   Z	updateposÚ
startswithÚstarttagopenÚmatchÚparse_starttagÚparse_endtagÚparse_commentÚparse_piÚparse_html_declarationÚ
endtagopenÚhandle_commentÚendswithr   Úunknown_declr"   Úhandle_declÚ	handle_piÚAssertionErrorÚcharrefÚgroupÚhandle_charrefÚendÚ	entityrefÚhandle_entityrefÚ
incomplete)r	   rU   r   ÚiÚnÚjZampposrE   rC   ÚkÚsuffixÚnamer
   r
   r   r   ¸   sâ    
ÿ









zHTMLParser.goaheadc                 C   st  | j }|||d … dks"J dƒ‚|||d … dkr@|  |¡S |||d … dkr”| jr”| d|d ¡}|d	k rvd
S |  ||d |… ¡ |d S |||d …  ¡ dkræ| d|d ¡}|d
krÈd
S |  ||d |… ¡ |d S |||d … dkrf| d|d ¡}|d	k rd
S ||d  dkrH|  ||d |d … ¡ n|  ||d |… ¡ |d S |  |¡S d S )Nr3   r2   z+unexpected call to parse_html_declaration()r5   r0   r8   r6   z]]>r   r:   r7   r9   r   r   z<![ú])	r   rH   r   r=   rN   r"   rO   rL   Úparse_bogus_comment)r	   rY   r   r[   Zgtposr
   r
   r   rJ   D  s2    

z!HTMLParser.parse_html_declarationc                 C   sp   | j }| d|¡sJ dƒ‚t ||d ¡}|sFt ||d ¡}|sFdS |rh| ¡ }|  ||d |… ¡ | ¡ S )Nr0   ú"unexpected call to parse_comment()r5   r:   )	r   rC   Úcommentcloser@   ÚcommentabruptcloserE   rA   rL   rU   )r	   rY   Úreportr   rE   r[   r
   r
   r   rH   f  s    zHTMLParser.parse_commentr   c                 C   s`   | j }|||d … dv s"J dƒ‚| d|d ¡}|dkr>dS |rX|  ||d |… ¡ |d S )Nr3   )r2   r/   ra   r   r:   r   )r   r=   rL   )r	   rY   rd   r   Úposr
   r
   r   r`   u  s    zHTMLParser.parse_bogus_commentc                 C   sd   | j }|||d … dks"J dƒ‚t ||d ¡}|s:dS | ¡ }|  ||d |… ¡ | ¡ }|S )Nr3   r1   zunexpected call to parse_pi()r:   )r   Úpicloser@   rA   rP   rU   ©r	   rY   r   rE   r[   r
   r
   r   rI     s    zHTMLParser.parse_pic                 C   s  d | _ |  |¡}|dk r|S | j}|||… | _ g }t ||d ¡}|sPJ dƒ‚| ¡ }| d¡ ¡  | _}||k r.t	 ||¡}|sŠq.| ddd¡\}	}
}|
s¨d }n\|d d… d  krÌ|dd … ksøn |d d… d  krô|dd … krn n|dd… }|rt
|ƒ}| |	 ¡ |f¡ | ¡ }ql|||…  ¡ }|d	vr¬|  ¡ \}}d
| j v rˆ|| j  d
¡ }t| j ƒ| j  d
¡ }n|t| j ƒ }|  |||… ¡ |S | d¡rÆ|  ||¡ n>|  ||¡ || jv rê|  |¡ n|| jv r| j|dd |S )Nr   r   z#unexpected call to parse_starttag()r3   r7   ú'r:   ú")r   ú/>Ú
rj   Tr    )r   Úcheck_for_whole_start_tagr   Útagfind_tolerantrE   rU   rS   r"   r   Úattrfind_tolerantr   ÚappendÚstripZgetposÚcountr<   r>   rB   rM   Úhandle_startendtagÚhandle_starttagÚCDATA_CONTENT_ELEMENTSr(   ÚRCDATA_CONTENT_ELEMENTS)r	   rY   Úendposr   ÚattrsrE   r\   ÚtagÚmÚattrnameÚrestZ	attrvaluerU   ÚlinenoÚoffsetr
   r
   r   rF     s^    

&ÿ
ÿ


ÿzHTMLParser.parse_starttagc                 C   s>   | j }t ||d ¡}|sJ ‚| ¡ }||d  dkr:dS |S )Nr   r   r:   )r   ÚlocatetagendrE   rU   rg   r
   r
   r   rl   Ä  s    z$HTMLParser.check_for_whole_start_tagc                 C   sà   | j }|||d … dks"J dƒ‚| d|d ¡dk r:dS t ||¡sp||d |d … dkrf|d S |  |¡S t ||d ¡}|sˆJ ‚| ¡ }||d  dkr¤dS t ||d ¡}|s¼J ‚| d¡ 	¡ }|  
|¡ |  ¡  |S )	Nr3   r/   zunexpected call to parse_endtagr   r   r:   r7   r   )r   r=   rK   rE   r`   r~   rU   rm   rS   r"   Úhandle_endtagr)   )r	   rY   r   rE   r[   rx   r
   r
   r   rG   Î  s&    

zHTMLParser.parse_endtagc                 C   s   |   ||¡ |  |¡ d S ©N)rs   r   ©r	   rx   rw   r
   r
   r   rr   ì  s    zHTMLParser.handle_startendtagc                 C   s   d S r€   r
   r   r
   r
   r   rs   ñ  s    zHTMLParser.handle_starttagc                 C   s   d S r€   r
   )r	   rx   r
   r
   r   r   õ  s    zHTMLParser.handle_endtagc                 C   s   d S r€   r
   ©r	   r^   r
   r
   r   rT   ù  s    zHTMLParser.handle_charrefc                 C   s   d S r€   r
   r‚   r
   r
   r   rW   ý  s    zHTMLParser.handle_entityrefc                 C   s   d S r€   r
   r   r
   r
   r   rB     s    zHTMLParser.handle_datac                 C   s   d S r€   r
   r   r
   r
   r   rL     s    zHTMLParser.handle_commentc                 C   s   d S r€   r
   )r	   Zdeclr
   r
   r   rO   	  s    zHTMLParser.handle_declc                 C   s   d S r€   r
   r   r
   r
   r   rP     s    zHTMLParser.handle_pic                 C   s   d S r€   r
   r   r
   r
   r   rN     s    zHTMLParser.unknown_decl)T)T)r   )!Ú__name__Ú
__module__Ú__qualname__Ú__doc__rt   ru   r   r   r   r   r   r   r(   r)   r+   r   rJ   rH   r`   rI   rF   rl   rG   rr   rs   r   rT   rW   rB   rL   rO   rP   rN   r
   r
   r
   r   r   Z   s>   	
	

 "

7
)r†   r#   r   Zhtmlr   Ú__all__r$   r   rX   rV   rR   rD   rK   rf   rb   rc   rm   ÚVERBOSErn   r~   Zlocatestarttagend_tolerantZ	endendtagZ
endtagfindr   r   r
   r
   r
   r   Ú<module>   s4   











õóò

