hNVdZddlZddlZddlmZdgZejdZejdZejdZ ejdZ ejd Z ejd Z ejd Z ejd Zejd ZejdZejdejZejdejZejdejZejd ZejdZGddejZdS)zA parser for HTML and XHTML.N)unescape HTMLParserz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]z z--!?>z-?>z0([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*a{ ( (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name ) ([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator ('[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\t\n\r\f ]* # bare value ) )? (?:[\t\n\r\f ]|/(?!>))* # possibly followed by a space a [a-zA-Z][^\t\n\r\f />]* # tag name [\t\n\r\f /]* # optional whitespace before attribute name (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name (?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\t\n\r\f ]* # bare value ) )? [\t\n\r\f /]* # possibly followed by a space )* >? aF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) \s* # possibly followed by a space )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#ceZdZdZdZdZdddZdZdZd Z d Z d Z d d dZ dZ d$dZdZdZd$dZd%dZdZdZdZdZdZdZdZdZdZdZd Zd!Zd"Zd#Z d S)&raEFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). If convert_charrefs is True the character references are converted automatically to the corresponding Unicode character (and self.handle_data() is no longer split in chunks), otherwise they are passed by calling self.handle_entityref() or self.handle_charref() with the string containing respectively the named or numeric reference as the argument. )scriptstyle)textareatitleT)convert_charrefsc<||_|dS)zInitialize and reset this instance. If convert_charrefs is True (the default), all character references are automatically converted to the corresponding Unicode characters. N)r reset)selfr s 2/opt/alt/python311/lib64/python3.11/html/parser.py__init__zHTMLParser.__init__ss !1 cd|_d|_t|_d|_d|_d|_tj |dS)z1Reset this instance. Loses all unprocessed data.z???NT) rawdatalasttaginteresting_normal interesting cdata_elem_support_cdata _escapable _markupbase ParserBaser rs rr zHTMLParser.reset|sK  -"$$T*****rcN|j|z|_|ddS)zFeed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). rN)rgoaheadrdatas rfeedzHTMLParser.feeds% |d*  Qrc0|ddS)zHandle any buffered data.N)rrs rclosezHTMLParser.closes QrNc|jS)z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textrs rget_starttag_textzHTMLParser.get_starttag_texts ##rF escapablec@||_||_|rB|js;t jd|jztjtjz|_dSt jd|jztjtjz|_dS)Nz&|])z])) lowerrrr recompile IGNORECASEASCIIr)relemr*s rset_cdata_modezHTMLParser.set_cdata_modes**,,#  BT2 B!z*Dt*V*,-*@ B BD    "z*BT_*T*,-*@ B BD   rc:t|_d|_d|_dS)NT)rrrrrs rclear_cdata_modezHTMLParser.clear_cdata_modes-rc||_dS)aEnable or disable support of the CDATA sections. If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>". If disabled, "<[CDATA[" starts a bogus comments which ends with ">". This method is not called by default. Its purpose is to be called in custom handle_starttag() and handle_endtag() methods, with value that depends on the adjusted current node. See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state for details. N)r)rflags r_set_support_cdatazHTMLParser._set_support_cdatas#rc |j}d}t|}||krI|jr}|jsv|d|}|dkrY|dt ||dz }|dkr*tjd ||sn|}n=|j ||}|r| }n |jrn|}||krV|jr2|j r+| t|||n| ||||||}||krn|j}|d|rt"||r||} n|d|r||} n|d|r||} nl|d|r||} nJ|d |r||} n(|d z|ks|r| d|d z} nn| dkr|snt"||rn|d|r_|d z|kr| dnt0||rnd|||d zdnB|d|rU|}d D]/} || |d zr|t| z}n0|||d z|n|d|r(|jr!|||dzdn|||dzdkr!|||d zdni|d |r!|||d zdn<|d|r!|||d zdntAd|} ||| }n#|d|rtB||}|rq|"d d} |#| |$} |d| d z s| d z } ||| }d||dvr9| |||d z|||d z}nI|d|r5tJ||}|rj|"d } |&| |$} |d| d z s| d z } ||| }tN||}|rX|rU|"||dkr5|$} | |kr|} |||d z}n@|d z|kr/| d|||d z}nn||kI|rr||krl|jr2|j r+| t|||n| ||||||}||d|_dS)Nr<&"z [\t\n\r\f ;]z>wOO$A(//;; AAA1uu(3T_3$$Xgacl%;%;<<<<$$WQqS\222q!$$AAvvu +Jz#q!!\ 6%%gq11++A..AAZa(( ))!,,AAZ** **1--AAZa(( a((AAZa((33A66AA!eq[[C[$$S)))AAAq55#))'155H#D!,,Hq5A:: ,,T2222'--gq99? !//! >>>>#FA..H&8&&F&//!<<& !S[[ 0 %&++GAaCEN;;;;#K33 H8K H))'!A#$$-8888 1Q3--//;>>((17777#D!,,H++GAaCDDM::::#D!,,Hwqstt}5555,-FGGGANN1a((D!$$+ 6 gq11  ;;==2.D''--- A%:c1Q3//"Eq!,,Agabbk))((1Q3888 NN1ac22C## 6!33 ;;q>>D))$/// A%:c1Q3//"Eq!,,A"((!445u{{}} ;;!IIKK66 !A NN1a!e44!eq[[$$S)))q!a%00AAs!eez  %1q55$ / /  '!A#,!7!78888  1...q!$$Aqrr{ rc(|j}|||dzdkr||S|||dzdkrM|jrF|d|dz}|dkrdS|||dz||dzS|||dzd krF|d |dz}|dkrdS|||d z||d zS|||dzd kry|d |dz}|dkrdS||d z dkr$|||dz|d z n |||d z||d zS||S)Nr?r<rBr@z]]>rrDrArCrr=r$zV # #%%a(( ( QqsU^{ * *t/B * UAaC((A1uur   gac1fo . . .q5L QqsU^ ! ! # #{ 2 2LLac**E{{r   WQqSY/ 0 0 07N QqsU^u $ $ S!A#&&A1uurqs|s""!!'!A#qs("34444##GAaCFO444q5L++A.. .rc(|j}t||dz}|s"t||dz}|sdS|r4|}|||dz||S)Nr?rD)r commentcloserJcommentabruptcloserPrKrWr`)rrdreportrrPrfs rrSzHTMLParser.parse_commentfs,##GQqS11 &,,Wac::E r  1 A   !Q 0 0 0yy{{rr$c|j}|d|dz}|dkrdS|r |||dz||dzS)Nrr=rDr$)rrGrW)rrdrrrposs rrmzHTMLParser.parse_bogus_commentusb,ll3!$$ "992  2   !C 0 1 1 1Qwrc|j}t||dz}|sdS|}|||dz||}|S)Nr=rD)rpicloserJrKr[r`rrdrrPrfs rrTzHTMLParser.parse_pisj,w!,, 2 KKMM wqsAv''' IIKKrcd|_||}|dkr|S|j}||||_g}t||dz}|}|dx|_}||krt||}|sn|ddd\} } } | sd} nI| dddcxkr| ddks"n| dddcxkr| ddkr nn | dd} | rt| } | | | f|}||k||| } | dvr| ||||S| d r|||nU|||||jvr||n ||jvr||d |S) Nrr$r=rA'rD")r/>r{Tr))r'check_for_whole_start_tagrtagfind_tolerantrPr`r^r,rattrfind_tolerantrappendstriprLrXhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr2RCDATA_CONTENT_ELEMENTS) rrdendposrattrsrPrhtagmattrnamerest attrvaluer`s rrQzHTMLParser.parse_starttags $//22 A::M,&qx0 &&w!44 IIKK"[[^^11333 s&jj!''33A ()1a(8(8 %HdI , 2A2$8888)BCC.88882A2#777723377777%adO  0$Y// LL(..**I6 7 7 7A&jjah%%'' k ! !   WQvX. / / /M <<   9  # #C / / / /  e , , ,d111##C((((444##C4#888 rc|j}t||dz}|}||dz dkrdS|S)Nr$rrD)r locatetagendrPr`rws rr|z$HTMLParser.check_for_whole_start_tagsL,""7AaC00 IIKK 1Q3<3  2rcF|j}|d|dzdkrdSt||s.||dz|dzdkr|dzS||St ||dz}|}||dz dkrdSt||dz}|d }| || |S)Nrr=rrDrAr$) rrGrVrPrmrr`r}r^r, handle_endtagr4)rrdrrPrfrs rrRzHTMLParser.parse_endtags, <<QqS ! !A % %2++ 3qs1Q3w3&&s //222""7AaC00 IIKK 1Q3<3  2!&&w!44kk!nn""$$ 3 rc\|||||dSN)rrrrrs rrzHTMLParser.handle_startendtags2 S%((( 3rcdSrrs rrzHTMLParser.handle_starttag rcdSrr)rrs rrzHTMLParser.handle_endtagrrcdSrrrrjs rr_zHTMLParser.handle_charrefrrcdSrrrs rrbzHTMLParser.handle_entityrefrrcdSrrr s rrLzHTMLParser.handle_datarrcdSrrr s rrWzHTMLParser.handle_commentrrcdSrr)rdecls rrZzHTMLParser.handle_declrrcdSrrr s rr[zHTMLParser.handle_pirrcdSrrr s rrYzHTMLParser.unknown_decl rr)T)r$)!__name__ __module__ __qualname____doc__rrrr r"r%r'r(r2r4r7rrUrSrmrTrQr|rRrrrr_rbrLrWrZr[rYrrrrrZs*13+/+++O$$$16BBBBB # # # # G#G#G#X///D           ,,,`<                                r)rr-rhtmlr__all__r.rrcrar]rOrVrvrprqr}VERBOSEr~rlocatestarttagend_tolerant endendtag endtagfindrrrrrrs""  . RZ'' RZ % % BJ> ? ? "*@ A Arz+&& RZ % % "*S//rz(## RZ''2:QRRBJ Z  rz Z   (RZ)Z BJsOO RZ> ? ? p p p p p 'p p p p p r