
    2)i2                        d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZ ddlmZmZ ddlmZmZmZ  G d	 d
      Z G d d      Zeeef   Zee   Z G d d      Zy)    )annotations)aliases)dumps)sub)AnyIteratorListTuple   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                     e Zd Z	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddZedd       ZddZddZ	ddZ
edd	       Zed d
       Zed!d       Zed!d       Zed d       Zedd       Zedd       Zedd       Zedd       Zedd       Zed"d       Zed#d       Zed!d       Zed d       Zed d       Zd$d%dZed&d       Zy)'CharsetMatchNc                    || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        || _        y )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesdecoded_payloadpreemptive_declarations           ]/var/www/html/spbu.com/backend/venv/lib/python3.12/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   s_     ,3.'6,5%315+-,/"-1,0#23I$    c                    t        |t              s)t        |t              rt        |      | j                  k(  S y| j                  |j                  k(  xr | j
                  |j
                  k(  S )NF)
isinstancer   strr   encodingfingerprintr!   others     r)   __eq__zCharsetMatch.__eq__)   sV    %.%% '4==88}}.X43C3CuGXGX3XXr+   c                   t        |t              st        t        | j                  |j                  z
        }t        | j
                  |j
                  z
        }|dk  r|dkD  r| j
                  |j
                  kD  S |dk  rS|dk  rNt        | j                        t        k\  r| j                  |j                  k  S | j                  |j                  kD  S | j                  |j                  k  S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gzt?g{Gz?)
r-   r   
ValueErrorabschaos	coherencelenr   r   multi_byte_usage)r!   r2   chaos_differencecoherence_differences       r)   __lt__zCharsetMatch.__lt__0   s     %."%djj5;;&>"?&)$..5??*J&K e#(<t(C>>EOO33%*>$*F 4==!%55zzEKK//((5+A+AAAzzEKK''r+   c                \    dt        t        |             t        | j                        z  z
  S )Ng      ?)r9   r.   rawr!   s    r)   r:   zCharsetMatch.multi_byte_usageF   s"    c#d)ns488}455r+   c                   | j                   st        | j                  | j                  d      | _         | j                  rA| j                  dk(  r2| j                   r&| j                   d   dk(  r| j                   dd  | _         | j                   S )Nstrictutf_7r   u   ﻿r   )r   r.   r   r   r   r@   s    r)   __str__zCharsetMatch.__str__J   sn    <<t}}dnnhGDL
 $$NNg-LLLLOx/#||AB/||r+   c                <    d| j                    d| j                   dS )Nz<CharsetMatch 'z' fp(z)>)r/   r0   r@   s    r)   __repr__zCharsetMatch.__repr__Z   s"     uT5E5E4FbIIr+   c                    t        |t              r|| k(  r$t        dj                  |j                              d |_        | j                  j                  |       y )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r-   r   r5   format	__class__r   r   appendr1   s     r)   add_submatchzCharsetMatch.add_submatch]   sO    %.%4-MTTOO  E"r+   c                    | j                   S N)r   r@   s    r)   r/   zCharsetMatch.encodingh   s    ~~r+   c                    g }t        j                         D ]G  \  }}| j                  |k(  r|j                  |       '| j                  |k(  s7|j                  |       I |S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr/   rJ   )r!   also_known_asups       r)   encoding_aliaseszCharsetMatch.encoding_aliasesl   sY    
 $&MMODAq}}!$$Q'!#$$Q'	 $
 r+   c                    | j                   S rM   r   r@   s    r)   bomzCharsetMatch.bomy       ###r+   c                    | j                   S rM   rU   r@   s    r)   byte_order_markzCharsetMatch.byte_order_mark}   rW   r+   c                F    | j                   D cg c]  }|d   	 c}S c c}w )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        r   r   )r!   es     r)   r&   zCharsetMatch.languages   s$     #oo.o!o...s   c                   | j                   shd| j                  v ryddlm}m} t        | j                        r || j                        n || j                        }t        |      dk(  sd|v ry|d   S | j                   d   d   S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r   could_be_from_charsetcharset_normalizer.cdr`   ra   r   r/   r9   )r!   r`   ra   r&   s       r)   languagezCharsetMatch.language   s      $444  X *$--8 &dmm4'6  9~"my&@ Q<q!!$$r+   c                    | j                   S rM   )r   r@   s    r)   r7   zCharsetMatch.chaos   s    $$$r+   c                @    | j                   sy| j                   d   d   S )Nr   r   r   r[   r@   s    r)   r8   zCharsetMatch.coherence   s     q!!$$r+   c                6    t        | j                  dz  d      S Nd      )ndigits)roundr7   r@   s    r)   percent_chaoszCharsetMatch.percent_chaos   s    TZZ#%q11r+   c                6    t        | j                  dz  d      S ri   )rm   r8   r@   s    r)   percent_coherencezCharsetMatch.percent_coherence   s    T^^c)155r+   c                    | j                   S )z+
        Original untouched bytes.
        )r   r@   s    r)   r?   zCharsetMatch.raw   s    
 }}r+   c                    | j                   S rM   )r   r@   s    r)   submatchzCharsetMatch.submatch   s    ||r+   c                2    t        | j                        dkD  S Nr   )r9   r   r@   s    r)   has_submatchzCharsetMatch.has_submatch   s    4<< 1$$r+   c                    | j                   | j                   S t        |       D cg c]  }t        |       }}t        t	        |D ch c]  }|s|	 c}            | _         | j                   S c c}w c c}w rM   )r   r.   r   sortedlist)r!   chardetected_rangesrs       r)   	alphabetszCharsetMatch.alphabets   sp    +'''MPQUY,WYT]4-@Y,W%d+L!!A+L&MN### -X+Ls   A0A5A5c                p    | j                   g| j                  D cg c]  }|j                   c}z   S c c}w )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        )r   r   r/   )r!   ms     r)   rc   z"CharsetMatch.could_be_from_charset   s0     t||"D|!1::|"DDD"Ds   3c                6     j                    j                   |k7  rr| _         t               } j                  = j                  j                         dvr!t	        t
         fd|dd d      }||dd z   }|j                  |d       _         j                  S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        N)zutf-8utf8utf_8c                    | j                   | j                         d   | j                         d    j                  | j                         d   t	        j
                        j                  dd            S )Nr   r   _-)stringspanreplacegroupsr   r   )r   r!   s    r)   <lambda>z%CharsetMatch.output.<locals>.<lambda>   sW    ahhqvvx{QVVXa[AII
1!$"7"78@@cJr+   i    r   )countr   )r   r.   r    lowerr   r   encoder   )r!   r/   decoded_stringpatched_headers   `   r)   outputzCharsetMatch.output   s    
   (D,A,AX,M$,D! YN,,80066812 "%3 #5D)" "0.2G!G#1#8#89#MD ###r+   c                *    t        t        |             S )z]
        Retrieve a hash fingerprint of the decoded payload, used for deduplication.
        )hashr.   r@   s    r)   r0   zCharsetMatch.fingerprint   s    
 CIr+   )NN)r"   bytes | bytearrayr#   r.   r$   floatr%   boolr&   CoherenceMatchesr'   
str | Noner(   r   )r2   objectreturnr   )r   r   r   r.   )r2   r   r   None)r   	list[str]r   r   )r   r   )r   zlist[CharsetMatch])r   )r/   r.   r   bytesr   int)__name__
__module____qualname__r*   r3   r=   propertyr:   rD   rF   rK   r/   rS   rV   rY   r&   re   r7   r8   rn   rp   r?   rs   rv   r}   rc   r   r0    r+   r)   r   r      s    '+-1J"J J 	J
 J $J $J !+J8Y(, 6 6 J	#   
 
 $ $ $ $ / / % %6 % % % %
 2 2 6 6     % % $ $ E E$:  r+   r   c                  R    e Zd ZdZdddZddZddZddZddZddZ	dd	Z
dd
Zy)CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nc                8    |rt        |      | _        y g | _        y rM   )rx   _results)r!   resultss     r)   r*   zCharsetMatches.__init__  s    ?FF7OBr+   c              #  8   K   | j                   E d {    y 7 wrM   r   r@   s    r)   __iter__zCharsetMatches.__iter__  s     ==  s   c                    t        |t              r| j                  |   S t        |t              r/t	        |d      }| j                  D ]  }||j
                  v s|c S  t        )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r-   r   r   r.   r   rc   KeyError)r!   itemresults      r)   __getitem__zCharsetMatches.__getitem__
  s[    
 dC ==&&dC T5)D--6777!M ( r+   c                ,    t        | j                        S rM   r9   r   r@   s    r)   __len__zCharsetMatches.__len__  s    4==!!r+   c                2    t        | j                        dkD  S ru   r   r@   s    r)   __bool__zCharsetMatches.__bool__  s    4==!A%%r+   c                   t        |t              s-t        dj                  t	        |j
                                    t        |j                        t        k  rW| j                  D ]H  }|j                  |j                  k(  s|j                  |j                  k(  s7|j                  |        y | j                  j                  |       t        | j                        | _	        y)z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r-   r   r5   rH   r.   rI   r9   r?   r   r   r0   r7   rK   rJ   rx   )r!   r   matchs      r)   rJ   zCharsetMatches.append  s    
 $-?FF'  txx=++$$(8(88U[[DJJ=V&&t, ' 	T"t}}-r+   c                :    | j                   sy| j                   d   S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   r@   s    r)   bestzCharsetMatches.best2  s     }}}}Qr+   c                "    | j                         S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   r@   s    r)   firstzCharsetMatches.first:  s     yy{r+   rM   )r   zlist[CharsetMatch] | None)r   zIterator[CharsetMatch])r   z	int | strr   r   r   r   )r   r   r   r   )r   zCharsetMatch | None)r   r   r   __doc__r*   r   r   r   r   rJ   r   r   r   r+   r)   r   r      s0    
O!"&.( r+   r   c                  Z    e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZedd       ZddZy)CliDetectionResultc                    || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        y rM   )pathunicode_pathr/   rS   alternative_encodingsre   r}   r%   r7   r8   is_preferred)r!   r   r/   rS   r   re   r}   r%   r7   r8   r   r   s               r)   r*   zCliDetectionResult.__init__F  sV     	(4$,+;0E"%$-$2!
 )".r+   c                    | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  dS )Nr   r/   rS   r   re   r}   r%   r7   r8   r   r   r   r@   s    r)   __dict__zCliDetectionResult.__dict__`  se     II $ 5 5%)%?%?"11ZZ -- --
 	
r+   c                2    t        | j                  dd      S )NT   )ensure_asciiindent)r   r   r@   s    r)   to_jsonzCliDetectionResult.to_jsonp  s    T]]a@@r+   N)r   r.   r/   r   rS   r   r   r   re   r.   r}   r   r%   r   r7   r   r8   r   r   r   r   r   )r   zdict[str, Any]r   )r   r   r   r*   r   r   r   r   r+   r)   r   r   E  s    // / $	/
  )/ / / / / / !/ /4 
 
Ar+   r   N)
__future__r   encodings.aliasesr   jsonr   rer   typingr   r   r	   r
   constantr   r   utilsr   r   r   r   r   r.   r   CoherenceMatchr   r   r   r+   r)   <module>r      sa    " %   - - G C Co od@ @F sEz"' ,A ,Ar+   