
    2\hN                     V   d Z ddlZddlZddlmZ dgZ ej        d          Z ej        d          Z ej        d          Z	 ej        d          Z
 ej        d	          Z ej        d
          Z ej        d          Z ej        d          Z ej        d          Z ej        d          Z ej        dej                  Z ej        dej                  Z ej        dej                  Z ej        d          Z ej        d          Z G d dej                  ZdS )zA parser for HTML and XHTML.    N)unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]>z--!?>z-?>z0([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*a{  
  (
    (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
   )
  ([\t\n\r\f ]*=[\t\n\r\f ]*        # value indicator
    ('[^']*'                        # LITA-enclosed value
    |"[^"]*"                        # LIT-enclosed value
    |(?!['"])[^>\t\n\r\f ]*         # bare value
    )
   )?
  (?:[\t\n\r\f ]|/(?!>))*           # possibly followed by a space
a  
  [a-zA-Z][^\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:[\t\n\r\f ]*=[\t\n\r\f ]*    # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                       e Zd ZdZdZdZdddZd Zd Zd	 Z	d
Z
d ZdddZd Zd$dZd Zd Zd$dZd%dZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd  Zd! Zd" Zd# Z d
S )&r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )scriptstyle)textareatitleT)convert_charrefsc                <    || _         |                                  dS )zInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r   reset)selfr   s     5/opt/python-3.11.14/usr/lib/python3.11/html/parser.py__init__zHTMLParser.__init__s   s     !1

    c                     d| _         d| _        t          | _        d| _        d| _        d| _        t          j        	                    |            dS )z1Reset this instance.  Loses all unprocessed data. z???NT)
rawdatalasttaginteresting_normalinteresting
cdata_elem_support_cdata
_escapable_markupbase
ParserBaser   r   s    r   r   zHTMLParser.reset|   sK    -"$$T*****r   c                 N    | j         |z   | _         |                     d           dS )zFeed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   goaheadr   datas     r   feedzHTMLParser.feed   s%     |d*Qr   c                 0    |                      d           dS )zHandle any buffered data.   N)r   r   s    r   closezHTMLParser.close   s    Qr   Nc                     | j         S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr   s    r   get_starttag_textzHTMLParser.get_starttag_text   s    ##r   F	escapablec                @   |                                 | _        || _        |rB| j        s;t	          j        d| j        z  t          j        t          j        z            | _        d S t	          j        d| j        z  t          j        t          j        z            | _        d S )Nz&|</%s(?=[\t\n\r\f />])z</%s(?=[\t\n\r\f />]))	lowerr   r   r   recompile
IGNORECASEASCIIr   )r   elemr*   s      r   set_cdata_modezHTMLParser.set_cdata_mode   s    **,,# 	BT2 	B!z*Dt*V*,-*@ B  BD  "z*BT_*T*,-*@ B  BDr   c                 :    t           | _        d | _        d| _        d S )NT)r   r   r   r   r   s    r   clear_cdata_modezHTMLParser.clear_cdata_mode   s    -r   c                     || _         dS )a  Enable or disable support of the CDATA sections.
        If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>".
        If disabled, "<[CDATA[" starts a bogus comments which ends with ">".

        This method is not called by default. Its purpose is to be called
        in custom handle_starttag() and handle_endtag() methods, with
        value that depends on the adjusted current node.
        See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
        for details.
        N)r   )r   flags     r   _set_support_cdatazHTMLParser._set_support_cdata   s     #r   c                    | j         }d}t          |          }||k     rI| j        r}| j        sv|                    d|          }|dk     rY|                    dt          ||dz
                      }|dk    r*t          j        d          	                    ||          sn|}n=| j
        	                    ||          }|r|                                }n| j        rn|}||k     rV| j        r2| j        r+|                     t          |||                              n|                     |||                    |                     ||          }||k    rn|j        } |d|          rt"                              ||          r|                     |          }	n |d|          r|                     |          }	n |d|          r|                     |          }	nl |d|          r|                     |          }	nJ |d	|          r|                     |          }	n(|d
z   |k     s|r|                     d           |d
z   }	nn|	dk     r|snt"                              ||          rn |d|          r_|dz   |k    r|                     d           nt0                              ||          rnd|                     ||dz   d                     nB |d|          rU|}dD ]/}
|                    |
|dz             r|t          |
          z  } n0|                     ||dz   |                    n |d|          r(| j        r!|                     ||dz   d                     n|||dz                                            dk    r!|                     ||dz   d                     ni |d	|          r!|                     ||dz   d                     n< |d|          r!|                     ||dz   d                     ntA          d          |}	|                     ||	          }n# |d|          rtB                              ||          }|rq|"                                dd         }| #                    |           |$                                }	 |d|	d
z
            s|	d
z
  }	|                     ||	          }d||d          v r9|                     |||dz                       |                     ||dz             }nI |d|          r5tJ                              ||          }|rj|"                    d
          }| &                    |           |$                                }	 |d|	d
z
            s|	d
z
  }	|                     ||	          }tN                              ||          }|rX|rU|"                                ||d          k    r5|$                                }	|	|k    r|}	|                     ||d
z             }n@|d
z   |k     r/|                     d           |                     ||d
z             }nn||k     I|rr||k     rl| j        r2| j        r+|                     t          |||                              n|                     |||                    |                     ||          }||d          | _         d S )Nr   <&"   z[\t\n\r\f ;]z</<!--z<?z<!r$      )z--!z---   	<![CDATA[   	   	<!doctypezwe should not get here!z&#;)(r   lenr   r   findrfindmaxr-   r.   searchr   startr   handle_datar   	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_piparse_html_declaration
endtagopenhandle_commentendswithr   unknown_declr,   handle_decl	handle_piAssertionErrorcharrefgrouphandle_charrefend	entityrefhandle_entityref
incomplete)r   r`   r   injampposrP   rN   ksuffixnames               r   r   zHTMLParser.goahead   s   ,LL!ee$ T_ LLa((q55 %]]3Aqt==F!J77>>wOO $A(//;; AA A1uu( 3T_ 3$$Xgacl%;%;<<<<$$WQqS\222q!$$AAvvu +Jz#q!! \6%%gq11 ++A..AAZa(( ))!,,AAZ** 
**1--AAZa(( a((AAZa(( 33A66AA!eq[[C[$$S)))AAAq55 #))'155 H#D!,, Hq5A:: ,,T2222'--gq99 ?  !//!>>>>#FA.. H&8 & &F&//!<< & !S[[ 0 %& ++GAaCEN;;;;#K33 
H8K 
H))'!A#$$-8888 1Q3--//;>>((17777#D!,, H++GAaCDDM::::#D!,, Hwqstt}5555,-FGGGANN1a((D!$$ +6gq11  ;;==2.D''---		A%:c1Q3// "Eq!,,Agabbk))((1Q3888 NN1ac22C## 6!33  ;;q>>D))$///		A%:c1Q3// "Eq!,,A"((!44  5u{{}};;!IIKK66 !A NN1a!e44!eq[[ $$S)))q!a%00AAs !eez  	%1q55$ / /  '!A#,!7!78888  1...q!$$Aqrr{r   c                 (   | j         }|||dz            dk    r|                     |          S |||dz            dk    rM| j        rF|                    d|dz             }|dk     rdS |                     ||dz   |                    |dz   S |||dz                                            d	k    rF|                    d
|dz             }|dk    rdS |                     ||dz   |                    |dz   S |||dz            dk    ry|                    d
|dz             }|dk     rdS ||dz
           dk    r$|                     ||dz   |dz
                      n |                     ||dz   |                    |dz   S |                     |          S )Nr?   r<   rB   r@   z]]>r   rD   rA   rC   r   r=   r$   z<![])	r   rS   r   rG   rY   r,   rZ   rW   parse_bogus_comment)r   rd   r   rf   gtposs        r   rU   z!HTMLParser.parse_html_declarationD  s   , 1QqS5>V##%%a(((QqsU^{**t/B*UAaC((A1uurgac1fo...q5LQqsU^!!##{22LLac**E{{rWQqSY/0007NQqsU^u$$S!A#&&A1uurqs|s""!!'!A#qs("34444##GAaCFO444q5L++A...r   c                 (   | j         }t                              ||dz             }|s"t                              ||dz             }|sdS |r4|                                }|                     ||dz   |                    |                                S )Nr?   rD   )r   commentcloserJ   commentabruptcloserP   rK   rW   r`   )r   rd   reportr   rP   rf   s         r   rS   zHTMLParser.parse_commentf  s    ,##GQqS11 	&,,Wac::E r 	1A!Q000yy{{r   r$   c                     | j         }|                    d|dz             }|dk    rdS |r |                     ||dz   |                    |dz   S )Nr   r=   rD   r$   )r   rG   rW   )r   rd   rr   r   poss        r   rm   zHTMLParser.parse_bogus_commentu  sb    , ll3!$$"992 	2!C 0111Qwr   c                     | j         }t                              ||dz             }|sdS |                                }|                     ||dz   |                    |                                }|S )Nr=   rD   )r   picloserJ   rK   r[   r`   r   rd   r   rP   rf   s        r   rT   zHTMLParser.parse_pi  sj    ,w!,, 	2KKMMwqsAv'''IIKKr   c                    d | _         |                     |          }|dk     r|S | j        }|||         | _         g }t                              ||dz             }|                                }|                    d                                          x| _        }||k     rt                              ||          }|sn|                    ddd          \  }	}
}|
sd }nI|d d         dcxk    r|dd          k    s"n |d d         dcxk    r|dd          k    rn n
|dd         }|rt          |          }|                    |	                                |f           |                                }||k     |||                                         }|dvr|                     |||                    |S |                    d	          r|                     ||           nU|                     ||           || j        v r|                     |           n || j        v r|                     |d
           |S )Nr   r$   r=   rA   'rD   ")r   />r{   Tr)   )r'   check_for_whole_start_tagr   tagfind_tolerantrP   r`   r^   r,   r   attrfind_tolerantr   appendstriprL   rX   handle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr2   RCDATA_CONTENT_ELEMENTS)r   rd   endposr   attrsrP   rh   tagmattrnamerest	attrvaluer`   s                r   rQ   zHTMLParser.parse_starttag  s     $//22A::M,&qx0  &&w!44IIKK"[[^^11333s&jj!''33A ()1a(8(8%HdI , 		2A2$8888)BCC.88882A2#777723377777%adO	 0$Y//	LL(..**I6777A &jj ah%%''k!!WQvX.///M<< 	9##C////  e,,,d111##C((((444##C4#888r   c                     | j         }t                              ||dz             }|                                }||dz
           dk    rdS |S )Nr$   r   rD   )r   locatetagendrP   r`   rw   s        r   r|   z$HTMLParser.check_for_whole_start_tag  sL    ,""7AaC00IIKK1Q3<32r   c                 F   | j         }|                    d|dz             dk     rdS t                              ||          s.||dz   |dz            dk    r|dz   S |                     |          S t
                              ||dz             }|                                }||dz
           dk    rdS t                              ||dz             }|                    d          	                                }| 
                    |           |                                  |S )Nr   r=   r   rD   rA   r$   )r   rG   rV   rP   rm   r   r`   r}   r^   r,   handle_endtagr4   )r   rd   r   rP   rf   r   s         r   rR   zHTMLParser.parse_endtag  s    ,<<QqS!!A%%2++ 	3qs1Q3w3&&s
//222""7AaC00IIKK1Q3<32 !&&w!44kk!nn""$$3r   c                 \    |                      ||           |                     |           d S N)r   r   r   r   r   s      r   r   zHTMLParser.handle_startendtag  s2    S%(((3r   c                     d S r    r   s      r   r   zHTMLParser.handle_starttag      r   c                     d S r   r   )r   r   s     r   r   zHTMLParser.handle_endtag  r   r   c                     d S r   r   r   rj   s     r   r_   zHTMLParser.handle_charref  r   r   c                     d S r   r   r   s     r   rb   zHTMLParser.handle_entityref  r   r   c                     d S r   r   r    s     r   rL   zHTMLParser.handle_data  r   r   c                     d S r   r   r    s     r   rW   zHTMLParser.handle_comment  r   r   c                     d S r   r   )r   decls     r   rZ   zHTMLParser.handle_decl  r   r   c                     d S r   r   r    s     r   r[   zHTMLParser.handle_pi  r   r   c                     d S r   r   r    s     r   rY   zHTMLParser.unknown_decl	  r   r   )T)r$   )!__name__
__module____qualname____doc__r   r   r   r   r"   r%   r'   r(   r2   r4   r7   r   rU   rS   rm   rT   rQ   r|   rR   r   r   r   r_   rb   rL   rW   rZ   r[   rY   r   r   r   r   r   Z   s        * 13+/     + + +     O$ $ $ 16 B B B B B  
# # # # G# G# G#X/ / /D   	 	 	 		 	 	, , ,`    <     
                    r   )r   r-   r   htmlr   __all__r.   r   rc   ra   r]   rO   rV   rv   rp   rq   r}   VERBOSEr~   r   locatestarttagend_tolerant	endendtag
endtagfindr   r   r   r   r   <module>r      s   " " 
			           .  RZ'' RZ%%
BJ>??	
"*@
A
Arz+&&RZ%%

"*S//rz(##RZ''  2:QRR BJ   Z   rz  Z   (RZ ) Z   BJsOO	RZ>??
p p p p p' p p p p pr   