
    6xiC                       d Z ddlmZ ddlZddlZddlZddlmZm	Z	 erddl
mZ  ej        d          Zej                            d          Zej                            e          Zej                            e           eej        d<    ej        d	          e_         ej        d
          e_         ej        d          e_         ej        d          e_        ej        e_         ej        dej                  e_         ej        dej                  e_         ej        d          Z G d dej                  Z e e_         G d dej                  Z!dS )a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
    )annotationsN)TYPE_CHECKINGSequence)Markdownz--!?>zhtml.parser
htmlparserz<[a-zA-Z]|</>z</[a-zA-Z]?z\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
a  
  [a-zA-Z][^`\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^`\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:=                            # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
z^([ ]*\n){2}c                  .     e Zd ZdZ fdZd fdZ xZS )_HTMLParserz"Handle special start and end tags.c                J   | j         ||dz            }t          |d                   }t          |          dk     sd|cxk    rdk    s;n d|cxk    rdk    s,n |                     | j         ||dz                       |dz   S t	                                          |          S )N   A   Z   a   z      )rawdataordlenhandle_datasuperparse_endtag)selfistartc	__class__s       Z/home/ubuntu/.hermes/hermes-agent/venv/lib/python3.11/site-packages/markdown/htmlparser.pyr   z_HTMLParser.parse_endtagc   s    QqsU#b	NNu::>>"----R----2>>>>c>>>>T\!AE'2333q5Lww##A&&&    r   intreturnc                    | j         ||dz            dk    r*|                     | j         ||dz                       |dz   S t                                          |          S )Nr   </>)r   r   r   parse_starttagr   r   r   s     r   r#   z_HTMLParser.parse_starttagk   sa    <!a% E))T\!AE'2333q5Lww%%a(((r   r   r   r    r   )__name__
__module____qualname____doc__r   r#   __classcell__r   s   @r   r	   r	   `   s\        ,,' ' ' ' ') ) ) ) ) ) ) ) ) )r   r	   c                      e Zd ZU dZd, fdZ fdZ fdZed-d	            Zd.dZ	d/dZ
d0dZd1dZd2dZd3dZd1dZd4dZd4dZd2dZd2dZd2dZd2dZd5 fd Zd6d"Zd5 fd#Zd7d8 fd&Zd'Zd(ed)<   d9d*Zd5d+Z xZS ):HTMLExtractorz
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    mdr   c                    d|vrd|d<   t          dg          | _        dg| _         t                      j        |i | || _        d S )Nconvert_charrefsFhrr   )set
empty_tagslineno_start_cacher   __init__r.   )r   r.   argskwargsr   s       r   r5   zHTMLExtractor.__init__   s]    V++).F%& tf++#$# 	$)&)))r   c                    d| _         d| _        g | _        g | _        g | _        dg| _        t                                                       dS )z1Reset this instance.  Loses all unprocessed data.Fr   N)inrawintailstack_cachecleandocr4   r   resetr   r   s    r   r>   zHTMLExtractor.reset   sE    
 "
!##%#$#r   c                   t                                                       t          | j                  r[| j        r:| j        s3|                     t                              | j                             n|                     | j                   t          | j	                  rX| j
                            | j        j                            d                    | j	                                       g | _	        dS dS )zHandle any buffered data. N)r   closer   r   r0   
cdata_elemr   r   unescaper<   r=   appendr.   	htmlStashstorejoinr?   s    r   rB   zHTMLExtractor.close   s    t| 	/ $ /T_ /  !4!4T\!B!BCCCC  ...t{ 	M  !2!8!89M9M!N!NOOODKKK	 	r   r    r   c                J   t          t          | j                  dz
  | j        dz
            D ]a}| j        |         }| j                            d|          }|dk    rt          | j                  }| j                            |dz              b| j        | j        dz
           S )zHReturns char index in `self.rawdata` for the start of the current line.    
r   )ranger   r4   linenor   findrE   )r   iilast_line_start_poslf_poss       r   line_offsetzHTMLExtractor.line_offset   s     D344Q6AFF 	5 	5B"&"9""=\&&t-@AAF||T\**#**6!84444&t{1}55r   boolc                    | j         dk    rdS | j         dk    rdS | j        | j        | j        | j         z                                            dk    S )z
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   Tr   FrA   )offsetr   rR   stripr   s    r   at_line_startzHTMLExtractor.at_line_start   sW     ;!4;??5|D,T-=-KKLRRTTXZZZr   tagstrc                    | j         | j        z   }t          j                            | j        |          }|r!| j        ||                                         S d                    |          S )z
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>)rR   rU   r   	endendtagsearchr   endformat)r   rY   r   ms       r   get_endtag_textzHTMLExtractor.get_endtag_text   sb      4;. ''e<< 	'<aeegg.. >>#&&&r   attrsSequence[tuple[str, str]]c                   || j         v r|                     ||           d S | j                            |          rC| j        s|                                 r(| j        s!d| _        | j                            d           | 	                                }| j        r6| j
                            |           | j                            |           d S | j                            |           || j        v r|                                  d S d S )NTrK   )r3   handle_startendtagr.   is_block_levelr:   rX   r9   r=   rE   get_starttag_textr;   r<   CDATA_CONTENT_ELEMENTSclear_cdata_mode)r   rY   rb   texts       r   handle_starttagzHTMLExtractor.handle_starttag   s   $/!!##C///F7!!#&& 	'DK 	'D<N<N<P<P 	'Y]Yc 	'DJM  &&&%%'': 	(Jc"""Kt$$$$$M  &&&d111%%''''' 21r   c                   |                      |          }| j        rH| j                            |           || j        v r,| j        r%| j                                        |k    rn| j        %t          | j                  dk    rt                              | j	        | j
        | j        z   t          |          z   d                    r| j                            d           nd| _        d| _        | j                            | j        j                            d                    | j                                       | j                            d           g | _        d S d S | j                            |           d S )Nr   rK   TFrA   

)ra   r9   r<   rE   r;   popr   blank_line_rematchr   rR   rU   r:   r=   r.   rF   rG   rH   )r   rY   rj   s      r   handle_endtagzHTMLExtractor.handle_endtag   sj   ##C((: 	'Kt$$$dj  j z~~''3.. j  4:!## &&t|D4Dt{4RUXY]U^U^4^4_4_'`aa 'K&&t,,,, #'DK"
$$TW%6%<%<RWWT[=Q=Q%R%RSSS$$V,,,  $# M  &&&&&r   datac                    | j         rd|v rd| _         | j        r| j                            |           d S | j                            |           d S )NrK   F)r:   r9   r<   rE   r=   r   rr   s     r   r   zHTMLExtractor.handle_data  s]    ; 	 44<<DK: 	'Kt$$$$$M  &&&&&r   is_blockc                   | j         s| j        r| j                            |           dS |                                 r |rt
                              | j        | j        | j	        z   t          |          z   d                   r|dz  }nd| _        | j        r| j        d         nd}|                    d          s/|                    d          r| j                            d           | j                            | j        j                            |                     | j                            d           dS | j                            |           dS )z Handle empty tags (`<data>`). NrK   Tr   rA   rm   )r9   r:   r<   rE   rX   ro   rp   r   rR   rU   r   r=   endswithr.   rF   rG   )r   rr   ru   items       r   handle_empty_tagzHTMLExtractor.handle_empty_tag	  sR   : 	' 	'Kt$$$$$!! 	'h 	'""4<0@4;0NQTUYQZQZ0Z0[0[#\]] # #(,=4=$$2D==(( +T]]4-@-@ +$$T***M  !2!8!8!>!>???M  (((((M  &&&&&r   c                    |                      |                                 | j                            |                     d S )Nru   )ry   rg   r.   rf   )r   rY   rb   s      r   re   z HTMLExtractor.handle_startendtag   s>    d4466AWAWX[A\A\]]]]]r   namec                Z    |                      d                    |          d           d S )Nz&#{};Fr{   ry   r_   r   r|   s     r   handle_charrefzHTMLExtractor.handle_charref#  s-    gnnT22UCCCCCr   c                Z    |                      d                    |          d           d S )Nz&{};Fr{   r~   r   s     r   handle_entityrefzHTMLExtractor.handle_entityref&  s-    fmmD11EBBBBBr   c                Z    |                      d                    |          d           d S )Nz	<!--{}-->Tr{   r~   rt   s     r   handle_commentzHTMLExtractor.handle_comment)  s/    k0066FFFFFr   c                Z    |                      d                    |          d           d S )Nz<!{}>Tr{   r~   rt   s     r   handle_declzHTMLExtractor.handle_decl-  s-    gnnT22TBBBBBr   c                Z    |                      d                    |          d           d S )Nz<?{}?>Tr{   r~   rt   s     r   	handle_pizHTMLExtractor.handle_pi0  s-    hood33dCCCCCr   c                    |                     d          rdnd}|                     d                    ||          d           d S )NzCDATA[z]]>z]>z<![{}{}Tr{   )
startswithry   r_   )r   rr   r^   s      r   unknown_declzHTMLExtractor.unknown_decl3  sK    x00:eedi..tS99DIIIIIr   r   c                    |                                  s| j        r!t                                          |          S |                     d           |dz   S )Nz<?r   )rX   r:   r   parse_pir   r$   s     r   r   zHTMLExtractor.parse_pi7  sW     	'4; 	'77##A&&& 	1ur   Tc                X   | j         }|                    d|          s
J d            t                              ||dz             }|s|                     d           |dz   S |r4|                                }|                     ||dz   |                    |                                S )Nz<!--z"unexpected call to parse_comment()   <rJ   )r   r   commentcloser]   r   r   r   r^   )r   r   reportr   rp   js         r   parse_commentzHTMLExtractor.parse_commentA  s    ,!!&!,,RR.RRR,##GQqS11 	S!!!q5L 	1A!Q000yy{{r   c                   |                                  s| j        r| j        ||dz            dk    r]| j        ||dz            dk    sG|                     |          }|dk    r*|                     | j        ||dz                       |dz   S |S t                                          |          S |                     d           |dz   S )	Nr   z<![	   z	<![CDATA[r   rJ   z<!r   )rX   r:   r   parse_bogus_commentr   r   parse_html_declaration)r   r   resultr   s      r   r   z$HTMLExtractor.parse_html_declarationM  s     		54; 		5|AacE"e++DL1Q34G;4V4V 11!44R<<$$T\!AE'%:;;;q5L7711!444 	1ur   r   r   c                    t                                          ||          }|dk    rdS |                     | j        ||         d           |S )Nr   Fr{   )r   r   ry   r   )r   r   r   posr   s       r   r   z!HTMLExtractor.parse_bogus_comment]  sU     gg))!V44"992dl1S51EBBB
r   Nz
str | None_HTMLExtractor__starttag_textc                    | j         S )z)Return full source of start tag: `<...>`.)r   rW   s    r   rg   zHTMLExtractor.get_starttag_textl  s    ##r   c                X   | j         ||dz            dk    r*|                     | j         ||dz                       |dz   S d | _        |                     |          }|dk     r*|                     | j         ||dz                       |dz   S | j         }|||         | _        g }t          j                            ||dz             }|s
J d            |                                }|                    d          	                                x| _
        }||k     rt          j                            ||          }|sn|                    ddd          \  }	}
}|
sd }nI|d d         dcxk    r|dd          k    s"n |d d         d	cxk    r|dd          k    rn n
|dd         }|rt                              |          }|                    |		                                |f           |                                }||k     |||                                         }|d
vr|                                 \  }}d| j        v rM|| j                            d          z   }t#          | j                  | j                            d          z
  }n|t#          | j                  z   }|                     |||                    |S |                    d          r|                     ||           n4|| j        v r|                     |           |                     ||           |S )Nr   r"   r   rJ   z#unexpected call to parse_starttag()r   'r   ")>/>rK   r   )r   r   r   check_for_whole_start_tagr   tagfind_tolerantrp   r^   grouplowerlasttagattrfind_tolerantrD   rE   rV   getposcountr   rfindrw   re   rh   set_cdata_moderk   )r   r   endposr   rb   rp   krY   r`   attrnamerest	attrvaluer^   rM   rU   s                  r   r#   zHTMLExtractor.parse_starttagp  s`   <!a% E))T\!AE'2333q5L#//22A::T\!AE'2333q5L,&qx0 +11'1Q3??;;;;;uIIKK"[[^^11333s&jj,227A>>A ()1a(8(8%HdI , 		2A2$8888)BCC.88882A2#777723377777%adO	 ;&//	::	LL(..**I6777A &jj ah%%''k!!![[]]NFFt+++$"6"<"<T"B"BBT122/55d;;<  #d&:";";;WQvX.///M<< 	-##C//// d111##C(((  e,,,r   )r.   r   )r    r   )r    rS   )rY   rZ   r    rZ   )rY   rZ   rb   rc   )rY   rZ   )rr   rZ   )rr   rZ   ru   rS   )r|   rZ   r%   )T)r   )r   r   r   r   r    r   )r    rZ   )r&   r'   r(   r)   r5   r>   rB   propertyrR   rX   ra   rk   rq   r   ry   re   r   r   r   r   r   r   r   r   r   r   r   __annotations__rg   r#   r*   r+   s   @r   r-   r-   x   sm              	 	 	 	 	     
6 
6 
6 X
6[ [ [ [' ' ' '( ( ( (*' ' ' '6' ' ' '' ' ' '.^ ^ ^ ^D D D DC C C CG G G GC C C CD D D DJ J J J     
 
 
 
             #'O&&&&$ $ $ $6 6 6 6 6 6 6 6r   r-   )"r)   
__future__r   reimportlib.util	importlibsystypingr   r   markdownr   compiler   util	find_specspecmodule_from_specr   loaderexec_modulemodulesstarttagopen
endtagopenpiclose	entityref
incompleteVERBOSElocatestarttagend_tolerantlocatetagendro   
HTMLParserr	   r-    r   r   <module>r      s  (  # " " " " " 				     



 * * * * * * * * "!!!!!! rz(## ~..^,,T22
   
 # # #&L  %"*_55
 "
=11
   RZ''
 !rz"?@@
  #,
 (2
 4 Z) )
 % %"* & Z 
 " 
?++) ) ) ) )*' ) ) )* $
 n n n n nJ) n n n n nr   