o �J�h�-�@s�zddlZWn eyddlZYnwddlZddlZddlmZddlmZddl m Z ddl m Z ddl mZmZmZmZmZmZddlmZdd lmZmZmZmZmZmZe ed �d ed efd d��Ze ed �d ed efdd��Z e ed �d ed eefdd��Z!e ed �d ed efdd��Z"e ed �d ed efdd��Z#e ed �d ed efdd��Z$e ed �d ed efdd��Z%e ed �d ed efdd��Z&e ed �d ed efdd��Z'e ed �d ed efdd ��Z(d ed efd!d"�Z)e ed �d ed efd#d$��Z*e ed �d ed efd%d&��Z+e ed �d ed efd'd(��Z,e ed �d ed efd)d*��Z-e ed �d ed efd+d,��Z.e e/e�d �d-ed efd.d/��Z0e ed �d ed efd0d1��Z1d_d3e2d4e3d eefd5d6�Z4e d7d �d8ed efd9d:��Z5d3e2d eeee2ffd;d<�Z6d=ed efd>d?�Z7d`dAedBed efdCdD�Z8dEed eefdFdG�Z9dHedIed e:fdJdK�Z;dHedIed efdLdM�Z<dNej=dOfd8edPe3dQed dfdRdS�Z> dadTe2dUedVe?dWe3dXedYedZe2d[ed\eed eeddffd]d^�Z@dS)b�N)�IncrementalDecoder)�aliases)� lru_cache)�findall)� Generator�List�Optional�Set�Tuple�Union)�MultibyteIncrementalDecoder�)�ENCODING_MARKS�IANA_SUPPORTED_SIMILAR�RE_POSSIBLE_ENCODING_INDICATION�UNICODE_RANGES_COMBINED�UNICODE_SECONDARY_RANGE_KEYWORD�UTF8_MAXIMAL_ALLOCATION)�maxsize� character�returncCsTzt�|�}Wn tyYdSwd|vp)d|vp)d|vp)d|vp)d|vp)d|vS)NFz WITH GRAVEz WITH ACUTEz WITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz WITH TILDE�� unicodedata�name� ValueError�r� description�r�VC:\pinokio\api\whisper-webui.git\app\env\lib\site-packages\charset_normalizer\utils.py�is_accentuateds  ������rcCs.t�|�}|s |S|�d�}tt|dd��S)N� r�)r� decomposition�split�chr�int)rZ decomposed�codesrrr� remove_accent,s  r'cCs.t|�}t��D] \}}||vr|SqdS)zK Retrieve the Unicode range official name from a single character. N)�ordr�items)rZ character_ord� range_nameZ ord_rangerrr� unicode_range7s �r+cC�*z t�|�}Wd|vStyYdSw)NFZLATINrrrrr�is_latinEs   ��r-cCs&z|�d�WdStyYdSw)N�asciiFT)�encode�UnicodeEncodeError�rrrr�is_asciiNs   ��r2cCs2t�|�}d|vr dSt|�}|durdSd|vS)N�PTF� Punctuation�r�categoryr+�r�character_category�character_rangerrr�is_punctuationWs r:cCs:t�|�}d|vs d|vrdSt|�}|durdSd|vS)N�S�NTF�Formsr5r7rrr� is_symbolfs r>cCst|�}|dur dSd|vS)NF� Emoticons)r+)rr9rrr� is_emoticonusr@cCs&|��s|dvr dSt�|�}d|vS)N>u|�<�>�+�,�;T�Z)�isspacerr6�rr8rrr� is_separators rIcCs|��|��kS�N)�islower�isupperr1rrr�is_case_variable�srMcCst�|�}|dkS)NZCo)rr6rHrrr�is_private_use_only�s rNcCr,)NF�CJKr�rZcharacter_namerrr�is_cjk��   ��rQcCr,)NFZHIRAGANArrPrrr� is_hiragana�rRrScCr,)NFZKATAKANArrPrrr� is_katakana�rRrTcCr,)NFZHANGULrrPrrr� is_hangul�rRrUcCr,)NFZTHAIrrPrrr�is_thai�rRrVr*cst�fdd�tD��S)Nc3s�|]}|�vVqdSrJr)�.0�keyword�r*rr� <genexpr>�s�z-is_unicode_range_secondary.<locals>.<genexpr>)�anyrrYrrYr�is_unicode_range_secondary�sr\cCs(|��duo|��duo|dko|dkS)NF�u)rG� isprintabler1rrr�is_unprintable�s  ���r_��sequence� search_zonecCs�t|t�st�t|�}tt|dt||��jddd��}t|�dkr$dS|D]'}|��� dd�}t � �D]\}}||krB|S||krL|Sq4q&dS)zW Extract using ASCII-only decoder any specified encoding in the first n-bytes. Nr.�ignore��errorsr�-�_) � isinstance�bytes� TypeError�lenrr�min�decode�lower�replacerr))rarbZseq_len�results�specified_encoding�encoding_alias� encoding_ianarrr�any_specified_encoding�s& �   ��rt�rcCs |dvptt�d�|��jt�S)zQ Verify is a specific encoding is a multi byte one based on it IANA name > � utf_16_be�utf_7�utf_32� utf_32_le�utf_16� utf_16_le� utf_32_be� utf_8_sig�utf_8� encodings.{})� issubclass� importlib� import_module�formatrr )rrrr�is_multi_byte_encoding�s  ��r�cCsJtD] }t|}t|t�r|g}|D]}|�|�r!||fSqqdS)z9 Identify and extract SIG/BOM in given sequence. )N�)rrhri� startswith)ra� iana_encodingZmarksZmarkrrr�identify_sig_or_bom s  ��r�r�cCs|dvS)N>rzrxr)r�rrr�should_strip_sig_or_bomsr�T�cp_name�strictcCsL|���dd�}t��D]\}}|||fvr|Sq |r$td�|���|S)Nrfrgz Unable to retrieve IANA for '{}')rnrorr)rr�)r�r�rrrsrrr� iana_name!s �r��decoded_sequencecCs4t�}|D]}t|�}|durq|�|�qt|�SrJ)�setr+�add�list)r��rangesrr9rrr� range_scan1s r�� iana_name_a� iana_name_bc Cs�t|�st|�r dSt�d�|��j}t�d�|��j}|dd�}|dd�}d}td�D]}t|g�}|�|�|�|�krA|d7}q,|dS) Ngrrcrdr��r ��)r�r�r�r�r�rangerirm) r�r�Z decoder_aZ decoder_bZid_aZid_b�character_match_count�iZ to_be_decodedrrr� cp_similarity?s*����    �r�cCs|tvo |t|vS)z� Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using the function cp_similarity. )r)r�r�rrr� is_cp_similarXs �r��charset_normalizerz)%(asctime)s | %(levelname)s | %(message)s�level� format_stringcCs:t�|�}|�|�t��}|�t�|��|�|�dSrJ)�logging� getLogger�setLevel� StreamHandler� setFormatter� Formatter� addHandler)rr�r��logger�handlerrrr�set_logging_handlercs  r�� sequencesrs�offsets� chunk_size�bom_or_sig_available�strip_sig_or_bom� sig_payload�is_multi_byte_decoder�decoded_payloadc cs2�|r|dur|D]} || | |�} | sdS| Vq dS|D]v} | |} | t|�dkr/q || | |�} |rA|durA|| } | j||rHdndd�} |r�| dkr�|| dkr�t|d�} |r�| d| �|vr�t| | d d �D]#}||| �} |r�|dur�|| } | j|dd�} | d| �|vr�nqo| Vq dS) NF�rcr�rdrrur!������)rkrmrlr�)r�rsr�r�r�r�r�r�r�r��chunkZ chunk_endZ cut_sequenceZchunk_partial_size_chk�jrrr�cut_sequence_chunksqsD� �  � �  ��r�)r`)TrJ)AZ unicodedata2r� ImportErrorr�r��codecsrZencodings.aliasesr� functoolsr�rer�typingrrrr r r Z_multibytecodecr �constantrrrrrr�str�boolrr'r+r-r2r:r>r@rIrMrNrQrSrTrUrVrkr\r_rir%rtr�r�r�r�r��floatr�r��INFOr�r�r�rrrr�<module>s�   �                    ���� ���������� � �
Memory