
    5bhN                        d Z ddlZddlZddlmZ dgZ ej                  d      Z ej                  d      Z ej                  d      Z	 ej                  d      Z
 ej                  d	      Z ej                  d
      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  dej"                        Z ej                  dej"                        Z ej                  dej"                        Z ej                  d      Z ej                  d      Z G d dej.                        Zy)zA parser for HTML and XHTML.    N)unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]>z--!?>z-?>z0([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*a{  
  (
    (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
   )
  ([\t\n\r\f ]*=[\t\n\r\f ]*        # value indicator
    ('[^']*'                        # LITA-enclosed value
    |"[^"]*"                        # LIT-enclosed value
    |(?!['"])[^>\t\n\r\f ]*         # bare value
    )
   )?
  (?:[\t\n\r\f ]|/(?!>))*           # possibly followed by a space
a  
  [a-zA-Z][^\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:[\t\n\r\f ]*=[\t\n\r\f ]*    # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                        e Zd ZdZdZdZdd fd
Z fdZd Zd	 Z	d
Z
d ZdddZd Zd#dZd Zd Zd#dZd$dZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd  Zd! Zd" Z  xZ!S )%r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )scriptstyle)textareatitleT)convert_charrefsc                P    t         |           || _        | j                          y)zInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)super__init__r   reset)selfr   	__class__s     5/opt/python-3.12.12/usr/lib/python3.12/html/parser.pyr   zHTMLParser.__init__s   s!     	 0

    c                 ~    d| _         d| _        t        | _        d| _        d| _        d| _        t        | !          y)z1Reset this instance.  Loses all unprocessed data. z???NT)	rawdatalasttaginteresting_normalinteresting
cdata_elem_support_cdata
_escapabler   r   )r   r   s    r   r   zHTMLParser.reset}   s8    -"r   c                 N    | j                   |z   | _         | j                  d       y)zFeed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   goaheadr   datas     r   feedzHTMLParser.feed   s     ||d*Qr   c                 &    | j                  d       y)zHandle any buffered data.   N)r   r   s    r   closezHTMLParser.close   s    Qr   Nc                     | j                   S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr$   s    r   get_starttag_textzHTMLParser.get_starttag_text   s    ###r   F	escapablec                v   |j                         | _        || _        |rT| j                  sHt	        j
                  d| j                  z  t        j                  t        j                  z        | _        y t	        j
                  d| j                  z  t        j                  t        j                  z        | _        y )Nz&|</%s(?=[\t\n\r\f />])z</%s(?=[\t\n\r\f />]))	lowerr   r   r   recompile
IGNORECASEASCIIr   )r   elemr*   s      r   set_cdata_modezHTMLParser.set_cdata_mode   s    **,#T22!zz*Dt*V*,--*@ BD  "zz*BT__*T*,--*@ BDr   c                 6    t         | _        d | _        d| _        y )NT)r   r   r   r   r$   s    r   clear_cdata_modezHTMLParser.clear_cdata_mode   s    -r   c                     || _         y)a  Enable or disable support of the CDATA sections.
        If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>".
        If disabled, "<[CDATA[" starts a bogus comments which ends with ">".

        This method is not called by default. Its purpose is to be called
        in custom handle_starttag() and handle_endtag() methods, with
        value that depends on the adjusted current node.
        See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
        for details.
        N)r   )r   flags     r   _set_support_cdatazHTMLParser._set_support_cdata   s     #r   c                    | j                   }d}t        |      }||k  r9| j                  rq| j                  se|j	                  d|      }|dk  r|j                  dt        ||dz
              }|dk\  r't        j                  d      j                  ||      sn|}n?| j                  j                  ||      }|r|j                         }n| j                  rn|}||k  rJ| j                  r*| j                  r| j                  t        |||              n| j                  |||        | j                  ||      }||k(  rn|j                   } |d|      r=t"        j%                  ||      r| j'                  |      }	n |d|      r| j)                  |      }	nt |d|      r| j+                  |      }	nY |d|      r| j-                  |      }	n> |d	|      r| j/                  |      }	n#|d
z   |k  s|r| j                  d       |d
z   }	nnH|	dk  rl|sn>t"        j%                  ||      rnN |d|      rK|dz   |k(  r| j                  d       n*t0        j%                  ||      rn| j3                  ||dz   d         n |d|      rF|}dD ]'  }
|j5                  |
|dz         s|t        |
      z  } n | j3                  ||dz   |        n |d|      r$| j6                  r| j9                  ||dz   d         n~|||dz    j;                         dk(  r| j=                  ||dz   d         nM |d	|      r| j3                  ||dz   d         n, |d|      r| j?                  ||dz   d         ntA        d      |}	| j                  ||	      }n |d|      rtB        j%                  ||      }|rY|jE                         dd }| jG                  |       |jI                         }	 |d|	d
z
        s|	d
z
  }	| j                  ||	      }d||d  v r,| j                  |||dz           | j                  ||dz         }n |d|      rtJ        j%                  ||      }|rW|jE                  d
      }| jM                  |       |jI                         }	 |d|	d
z
        s|	d
z
  }	| j                  ||	      }tN        j%                  ||      }|rE|rB|jE                         ||d  k(  r,|jI                         }	|	|k  r|}	| j                  ||d
z         }n>|d
z   |k  r'| j                  d       | j                  ||d
z         }nnJ d       ||k  r9|ra||k  r\| j                  r*| j                  r| j                  t        |||              n| j                  |||        | j                  ||      }||d  | _         y )Nr   <&"   z[\t\n\r\f ;]</<!--<?<!r#      )z--!z---   	<![CDATA[   	   	<!doctypezwe should not get here!z&#;zinteresting.search() lied)(r   lenr   r   findrfindmaxr-   r.   searchr   startr   handle_datar   	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_piparse_html_declaration
endtagopenhandle_commentendswithr   unknown_declr,   handle_decl	handle_piAssertionErrorcharrefgrouphandle_charrefend	entityrefhandle_entityref
incomplete)r   rc   r   injampposrS   rQ   ksuffixnames               r   r   zHTMLParser.goahead   s   ,,L!e$$T__LLa(q5 %]]3Aqt=F!JJ7>>wOA((//;AA1u((T__$$Xgal%;<$$WQq\2q!$AAvu ++J#q!%%gq1++A.Aa())!,A***1-Aa(a(Aa(33A6A!eq[C$$S)AAq5#))'15#D!,q5A: ,,T2'--gq9  !//!>#FA.&8F&//!< !S[ 0 % '9 ++GAaCN;#K38K8K))'!A#$-8 1Q3--/;>((17#D!,++GAaCDM:#D!,wqst}5,-FGGANN1a(D!$gq1 ;;=2.D''-		A%c1Q3/Eq!,Agabk)((1Q38 NN1ac2C#!3 ;;q>D))$/		A%c1Q3/Eq!,A"((!4u{{};!IIK6 !A NN1a!e4!eq[ $$S)q!a%0A555qw !ez 1q5$$  '!A,!78  1.q!$Aqr{r   c                    | j                   }|||dz    dk(  sJ d       |||dz    dk(  r| j                  |      S |||dz    dk(  rC| j                  r7|j                  d|dz         }|d	k  ry
| j	                  ||dz   |        |dz   S |||dz    j                         dk(  r7|j                  d|dz         }|d
k(  ry
| j                  ||dz   |        |dz   S |||dz    dk(  ra|j                  d|dz         }|d	k  ry
||dz
     dk(  r| j	                  ||dz   |dz
          |dz   S | j                  ||dz   |        |dz   S | j                  |      S )Nr@   r?   z+unexpected call to parse_html_declaration()rB   r=   rE   rC   z]]>r   rG   rD   rF   r   r#   z<![])	r   rV   r   rJ   r\   r,   r]   rZ   parse_bogus_comment)r   rg   r   ri   gtposs        r   rX   z!HTMLParser.parse_html_declarationE  s   ,,q1~% 	D )C 	D%1QqS>V#%%a((Qqs^{*t/B/BUAaC(A1ugac1o.q5LQqs^!!#{2LLac*E{WQqS/07NQqs^u$S!A#&A1uqs|s"!!'!A#qs"34 q5L ##GAaCO4q5L++A..r   c                 ,   | j                   }|j                  d|      sJ d       t        j                  ||dz         }|st        j                  ||dz         }|sy|r'|j                         }| j                  ||dz   |        |j                         S )Nr=   "unexpected call to parse_comment()rB   rG   )	r   rQ   commentcloserM   commentabruptcloserS   rN   rZ   rc   )r   rg   reportr   rS   ri   s         r   rV   zHTMLParser.parse_commentg  s    ,,!!&!,R.RR,##GQqS1&,,Wac:EA!Q0yy{r   c                     | j                   }|||dz    dv sJ d       |j                  d|dz         }|dk(  ry|r| j                  ||dz   |        |dz   S )Nr@   )r?   r<   rs   r   rG   r#   )r   rJ   rZ   )r   rg   rv   r   poss        r   rp   zHTMLParser.parse_bogus_commentv  su    ,,q1~- 	C 1B 	C-ll3!$"9!C 01Qwr   c                     | j                   }|||dz    dk(  sJ d       t        j                  ||dz         }|sy|j                         }| j	                  ||dz   |        |j                         }|S )Nr@   r>   zunexpected call to parse_pi()rG   )r   picloserM   rN   r^   rc   r   rg   r   rS   ri   s        r   rW   zHTMLParser.parse_pi  st    ,,q1~%F'FF%w!,KKMwqsA'IIKr   c                    d | _         | j                  |      }|dk  r|S | j                  }||| | _         g }t        j	                  ||dz         }|sJ d       |j                         }|j                  d      j                         x| _        }||k  rt        j	                  ||      }|sn|j                  ddd      \  }	}
}|
sd }n,|d d dcxk(  r|dd  k(  sn |d d dcxk(  r|dd  k(  rn n|dd }|rt        |      }|j                  |	j                         |f       |j                         }||k  r||| j                         }|d	vr| j                  |||        |S |j                  d
      r| j                  ||       |S | j!                  ||       || j"                  v r| j%                  |       |S || j&                  v r| j%                  |d       |S )Nr   r#   z#unexpected call to parse_starttag()r@   rD   'rG   ")r   />r   Tr)   )r'   check_for_whole_start_tagr   tagfind_tolerantrS   rc   ra   r,   r   attrfind_tolerantr   appendstriprO   r[   handle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr2   RCDATA_CONTENT_ELEMENTS)r   rg   endposr   attrsrS   rk   tagmattrnamerest	attrvaluerc   s                r   rT   zHTMLParser.parse_starttag  s     $//2A:M,,&q0  &&w!4;;;uIIK"[[^1133s&j!''3A()1a(8%HdI 	2A$8)BC.82A#7237%aO	$Y/	LL(..*I67A &j a%%'k!WQv./M<<##C/    e,d111##C(  444##C4#8r   c                     | j                   }t        j                  ||dz         }|sJ |j                         }||dz
     dk7  ry|S )Nr#   r   rG   )r   locatetagendrS   rc   r{   s        r   r   z$HTMLParser.check_for_whole_start_tag  sK    ,,""7AaC0uIIK1Q3<3r   c                    | j                   }|||dz    dk(  sJ d       |j                  d|dz         dk  ryt        j                  ||      s$||dz   |dz    dk(  r|dz   S | j	                  |      S t
        j                  ||dz         }|sJ |j                         }||dz
     dk7  ryt        j                  ||dz         }|sJ |j                  d      j                         }| j                  |       | j                          |S )	Nr@   r<   zunexpected call to parse_endtagr   r   rG   rD   r#   )r   rJ   rY   rS   rp   r   rc   r   ra   r,   handle_endtagr4   )r   rg   r   rS   ri   r   s         r   rU   zHTMLParser.parse_endtag  s    ,,q1~%H'HH%<<QqS!A%+qs1Q33&s
//22""7AaC0uIIK1Q3<3 !&&w!4ukk!n""$3r   c                 J    | j                  ||       | j                  |       y N)r   r   r   r   r   s      r   r   zHTMLParser.handle_startendtag  s     S%(3r   c                      y r    r   s      r   r   zHTMLParser.handle_starttag      r   c                      y r   r   )r   r   s     r   r   zHTMLParser.handle_endtag  r   r   c                      y r   r   r   rm   s     r   rb   zHTMLParser.handle_charref  r   r   c                      y r   r   r   s     r   re   zHTMLParser.handle_entityref  r   r   c                      y r   r   r   s     r   rO   zHTMLParser.handle_data  r   r   c                      y r   r   r   s     r   rZ   zHTMLParser.handle_comment  r   r   c                      y r   r   )r   decls     r   r]   zHTMLParser.handle_decl  r   r   c                      y r   r   r   s     r   r^   zHTMLParser.handle_pi  r   r   c                      y r   r   r   s     r   r\   zHTMLParser.unknown_decl
  r   r   )T)r#   )"__name__
__module____qualname____doc__r   r   r   r   r!   r%   r'   r(   r2   r4   r7   r   rX   rV   rp   rW   rT   r   rU   r   r   r   rb   re   rO   rZ   r]   r^   r\   __classcell__)r   s   @r   r   r   Z   s    * 13+/  O$ 16 B
# G#X/D		,`< 
r   )r   r-   _markupbasehtmlr   __all__r.   r   rf   rd   r`   rR   rY   rz   rt   ru   r   VERBOSEr   r   locatestarttagend_tolerant	endendtag
endtagfind
ParserBaser   r   r   r   <module>r      s[   " 
   .  RZZ' RZZ%
BJJ>?	
"**@
Arzz+&RZZ%

"**S/rzz(#RZZ'  2::QR BJJ   ZZ  rzz  ZZ  (RZZ ) ZZ  BJJsO	RZZ>?
q'' qr   