g.ddlmZddlZddlZddlZddlmZddlmZddl m Z ddl m Z ddl mZddlmZd d lmZmZmZmZmZmZe e d*d Ze e d+d Ze e d,dZe e d*dZe e d*dZe e d*dZe e d*dZe e d*dZe e d*dZ e e d*dZ!e e d*dZ"e e d*dZ#e e d*dZ$e e d*dZ%e e d*dZ&e e d*dZ'e e(e d-dZ)e e d*dZ*d.d/dZ+e d d0d Z,d1d!Z-d2d"Z.d3d4d#Z/d5d$Z0d6d%Z1d&ejdd'f d7d(Z3 d8 d9d)Z4y):) annotationsN)IncrementalDecoder)aliases) lru_cache)findall) Generator)MultibyteIncrementalDecoder)ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsizec tj|}d|vxs(d|vxs"d|vxsd|vxsd|vxsd|vxs d|vxsd |vS#t$rYywxYw) NFz WITH GRAVEz WITH ACUTEz WITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz WITH TILDEz WITH MACRONzWITH RING ABOVE unicodedataname ValueError character descriptions I/opt/hc_python/lib64/python3.12/site-packages/charset_normalizer/utils.pyis_accentuatedrs&++I6   # , ; & , [ ( , { * ,  +  , ; &  , K '  ,  +  sA AActj|}|s|S|jd}tt |ddS)N r)r decompositionsplitchrint)r decomposedcodess r remove_accentr%,sA!// :J !'',E s58R !!cbt|}tjD] \}}||vs |cSy)zK Retrieve the Unicode range official name from a single character. N)ordritems)r character_ord range_name ord_ranges r unicode_ranger-7s9 YM!8!>!>!@ I I % "A r&cT tj|}d|vS#t$rYywxYw)NFLATINrrs ris_latinr0Es8&++I6  k !!   ''cZtj|}d|vryt|}|yd|vS)NPTF Punctuationrcategoryr-rcharacter_categorycharacter_ranges ris_punctuationr:Ns=)229=   "/ ":O O ++r&cptj|}d|vsd|vryt|}|yd|vxr|dk7S)NSNTFFormsLor5r7s r is_symbolr@]sP)229=   C+=$="/ ":O o % D*<*DDr&c2t|}|yd|vxsd|vS)NF Emoticons Pictographs)r-)rr9s r is_emoticonrDls*"/ ":O / ) M]o-MMr&cj|js|dvrytj|}d|vxs|dvS)N>|+<>TZ>PcPdPo)isspacerr6)rr8s r is_separatorrOvsBi+AA)229= $ $ P(:>P(PPr&cD|j|jk7SN)islowerisupperrs ris_case_variablerUs    )"3"3"5 55r&cT tj|}d|vS#t$rYywxYw)NFCJKrrcharacter_names ris_cjkrZs8$)))4 N "" r1cT tj|}d|vS#t$rYywxYw)NFHIRAGANArrXs r is_hiraganar]8$)))4  '' r1cT tj|}d|vS#t$rYywxYw)NFKATAKANArrXs r is_katakanarar^r1cT tj|}d|vS#t$rYywxYw)NFHANGULrrXs r is_hangulrd8$)))4 ~ %% r1cT tj|}d|vS#t$rYywxYw)NFTHAIrrXs ris_thairhs8$)))4 ^ ## r1cT tj|}d|vS#t$rYywxYw)NFARABICrrXs r is_arabicrkrer1c` tj|}d|vxrd|vS#t$rYywxYw)NFrjz ISOLATED FORMrrXs ris_arabic_isolated_formrmsB$)))4 ~ % K/^*KK s ! --c4tfdtDS)Nc3&K|]}|v ywrQ).0keywordr+s r z-is_unicode_range_secondary..sT4Sw*$4Ss)anyr)r+s`ris_unicode_range_secondaryrus T4ST TTr&cj|jduxr |jduxr |dk7xr|dk7S)NFu)rN isprintablerTs ris_unprintablerysL u$ "  ! ! #u , "   "  ! r&c rt|tstt|}t t |dt ||jdd}t|dk(ry|D]T}|jjdd}tjD]\}}||k(r|ccS||k(s|ccSVy)zW Extract using ASCII-only decoder any specified encoding in the first n-bytes. Nasciiignoreerrorsr-_) isinstancebytes TypeErrorlenrr mindecodelowerreplacerr))sequence search_zoneseq_lenresultsspecified_encodingencoding_alias encoding_ianas rany_specified_encodingrs h &x=G ',3w ,-44WX4NG  7|q%/557??SI .5]]_ )NM!33$$ 22$$ .= & r&cn|dvxs0ttjd|jtS)zQ Verify is a specific encoding is a multi byte one based on it IANA name > utf_7utf_8utf_16utf_32 utf_16_be utf_16_le utf_32_be utf_32_le utf_8_sig encodings.) issubclass importlib import_modulerr )rs ris_multi_byte_encodingrsC     *TF 34GG#  r&ctD]>}t|}t|tr|g}|D]}|j|s||fccS@y)z9 Identify and extract SIG/BOM in given sequence. )Nr&)r rr startswith)r iana_encodingmarksmarks ridentify_sig_or_bomrsQ ( %3M%B eU #GED""4($d** ( r&c |dvS)N>rrrp)rs rshould_strip_sig_or_bomr"s  4 44r&c|jjdd}tjD]\}}|||fvs |cS|rt d|d|S)zIReturns the Python normalized encoding name (Not the IANA official name).rrzUnable to retrieve IANA for '')rrrr)r)cp_namestrictrrs r iana_namer&scmmo%%c3/G *1% ~}5 5 *98 CDD Nr&ctt|s t|rytjd|j}tjd|j}|d}|d}d}t dD]7}t |g}|j ||j |k(s3|dz }9|dz S) Ngrr|r}rr )rrrrrangerr) iana_name_a iana_name_b decoder_a decoder_bid_aid_bcharacter_match_counti to_be_decodeds r cp_similarityr7sk*.D[.Q''*[M(BCVVI''*[M(BCVVI(9D(9D!" 3Z$aSz ;;} %])C C !Q & ! !3 &&r&c,|tvxr |t|vS)z Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using the function cp_similarity. )r )rrs r is_cp_similarrKs% -- ? 1+> >r&charset_normalizerz)%(asctime)s | %(levelname)s | %(message)sctj|}|j|tj}|j tj ||j |yrQ)logging getLoggersetLevel StreamHandler setFormatter Formatter addHandler)rlevel format_stringloggerhandlers rset_logging_handlerrVsU   t $F OOE##%G **=9: gr&c #K|r|dur|D]} || | |z} | sy| y|D]} | |z} | t|dzkDr|| | |z} |r |dur|| z} | j||rdnd} |r[| dkDrVt|d} |rH| d| |vrAt| | dz d D].}||| } |r |dur|| z} | j|d} | d| |vs.n| yw) NFr|rr}rr)rrrr) sequencesroffsets chunk_sizebom_or_sig_availablestrip_sig_or_bom sig_payloadis_multi_byte_decoderdecoded_payloadrchunk chunk_end cut_sequencechunk_partial_size_chkjs rcut_sequence_chunksrcs<0E9A#AJ7EK  AJI3y>A--$QZ8L#(8E(A*\9  ''#8xh(E%Q.1*b.A&$556oM"1a!eR0'09'= /4D4M+6+EL , 3 3M( 3 S !8"89_L!1KGs B>C  C )rstrreturnbool)rrrr)rrr str | None)r+rrr)i )rrrr"rr)rrrr)rrrztuple[str | None, bytes])rrrr)T)rrrrrr)rrrrrfloat)rrrrrr)rrrr"rrrNonerQ)rrrrrrrr"rrrrrrrrrrrzGenerator[str, None, None])5 __future__rrrrcodecsrencodings.aliasesr functoolsrrertypingr_multibytecodecr constantr r r rrrrr%r-r0r:r@rDrOrUrZr]rardrhrkrmrruryrrrrrrrINFOrrrpr&rrs"%% *+," *+"," *+ ,  *+"," *+ ,, , *+ E, E *+N,N *+Q,Q *+6,6 *+#,# *+(,( *+(,( *+&,& *+$,$ *+&,& *+L,L 3./0U1U *+,@ 3($5"'(%D       ,#'5555 5  5  55 5 5 5r&