
    EqgYJ                         d Z ddlZddlZddlmZmZmZ  G d de          Zd Z	 G d d	e          Z
 G d
 de          ZddZedk    rddlZ ej                     dS dS )u  Unicode utility functions

>>> from .import unicode_util
>>> from .util import u
>>> u1 = '1'  # DIGIT ONE
>>> u2 = u('a')  # LATIN SMALL LETTER A
>>> u3 = u('２')  # FULLWIDTH DIGIT TWO
>>> u4 = u('Ā')  # LATIN CAPITAL LETTER A WITH MACRON
>>> unicode_util.Category.get(u1) == u('Nd')
True
>>> unicode_util.Category.get(u2) == u('Ll')
True
>>> unicode_util.Category.get(u3) == u('Nd')
True
>>> unicode_util.Category.get(u4) == u('Lu')
True
>>> unicode_util.Category.get(u2) == unicode_util.Category.LOWERCASE_LETTER
True
>>> try:
...     beyond_bmp = u('𐄀')  # AEGEAN WORD SEPARATOR LINE
... except Exception:
...     beyond_bmp = u('')
>>> if len(beyond_bmp) == 1:  # We have a UCS4 build of Python
...     cat_po = unicode_util.Category.get(beyond_bmp)
... else:  # UCS2 build of Python; no non-BMP chars available
...     cat_po = unicode_util.Category.OTHER_PUNCTUATION
>>> cat_po == u('Po')
True
>>> unicode_util.is_letter(u1)
False
>>> unicode_util.is_letter(u2)
True
>>> unicode_util.is_letter(u3)
False
>>> unicode_util.is_letter(u4)
True
>>> b1 = unicode_util.Block.get(u1)
>>> str(b1)
'Block[0000, 007f]'
>>> b1 == unicode_util.Block.BASIC_LATIN
True
>>> b1 == [0x0000, 0x0075]
False
>>> b2 = unicode_util.Block.get(u2)
>>> b2 == unicode_util.Block.BASIC_LATIN
True
>>> b3 = unicode_util.Block.get(u3)
>>> b3 != unicode_util.Block.BASIC_LATIN
True
>>> b3 == unicode_util.Block.HALFWIDTH_AND_FULLWIDTH_FORMS
True
>>> b4 = unicode_util.Block.get(u4)
>>> b4 == unicode_util.Block.LATIN_EXTENDED_A
True
>>> unicode_util.Block.get(u('ࡠ')) == unicode_util.Block.UNKNOWN
True
>>> try:
...     unknown_block = u('𓐰')
... except Exception:
...     unknown_block = u('')
>>> if len(unknown_block) == 1:  # We have a UCS4 build of Python
...     unicode_util.Block.get(u('𓐰')) == unicode_util.Block.UNKNOWN
... else:  # UCS2 build of Python; no unknown characters available
...     True
True
>>> unicode_util.digit(u1)
1
>>> unicode_util.digit(u2, -1)
-1
>>> unicode_util.digit(u3, -1)
2
>>> str(hash(b3))  # doctest: +ELLIPSIS
'...'
    N   )UnicodeMixinunicoduc                   V   e Zd ZdZ ed          Z ed          Z ed          Z ed          Z ed          Z	 ed          Z
 ed          Z ed	          Z ed
          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z ed          Z  ed          Z! ed          Z" ed           Z# ed!          Z$ ed"          Z% ed#          Z& ed$          Z' ed%          Z( ed&          Z)e*d'             Z+d(S ))CategoryzaGeneral category of a Unicode character.

    See http://www.unicode.org/reports/tr18/#CategoriesLLuLlLtLmLoMMnMcMeNNdNlNoSSmScSkSoPPcPdPsPePiPfPoZZsZlZpCCcCfCsCoCnc                 b    t          |          }t          t          j        |                    S )zTReturn the general category code (as Unicode string) for the given Unicode character)r   unicodedatacategory)clsuni_chars     K/var/www/html/env/lib/python3.11/site-packages/phonenumbers/unicode_util.pygetzCategory.get{   s*     (##k*844555    N),__name__
__module____qualname____doc__r   LETTERUPPERCASE_LETTERLOWERCASE_LETTERTITLECASE_LETTERMODIFIER_LETTEROTHER_LETTERMARKNON_SPACING_MARKSPACING_COMBINING_MARKENCLOSING_MARKNUMBERDECIMAL_DIGIT_NUMBERLETTER_NUMBEROTHER_NUMBERSYMBOLMATH_SYMBOLCURRENCY_SYMBOLMODIFIER_SYMBOLOTHER_SYMBOLPUNCTUATIONCONNECTOR_PUNCTUATIONDASH_PUNCTUATIONOPEN_PUNCTUATIONCLOSE_PUNCTUATIONINITIAL_PUNCTUATIONFINAL_PUNCTUATIONOTHER_PUNCTUATION	SEPARATORSPACE_SEPARATORLINE_SEPARATORPARAGRAPH_SEPARATOROTHERCONTROLFORMAT	SURROGATEPRIVATE_USENOT_ASSIGNEDclassmethodr4    r5   r3   r   r   Q   s(       ; ; QsVVFqwwqwwqwwaggO1T77L1S66DqwwQtWWQtWWNQsVVF1T77AdGGM1T77LQsVVF!D''KaggOaggO1T77L!C&&KAdGGqwwqww$!D''$$#IaggOQtWWN!D''AcFFEaggGQtWWF$I!D''K1T77L6 6 [6 6 6r5   r   c                     t                               |           }|t           j        k    p?|t           j        k    p/|t           j        k    p|t           j        k    p|t           j        k    S )zADetermine whether the given Unicode character is a Unicode letter)r   r4   r;   r<   r=   r>   r?   )r2   r0   s     r3   	is_letterrb      sg    ||H%%H11 .11.11. 00. --	/r5   c                   2    e Zd ZdZddZd Zd Zd Zd ZdS )	_BlockRangez?Describe the range of characters encompassed by a Unicode blockNc                 4    || _         || _        || ||<   d S d S N)startend)selfrg   rh   regdicts       r3   __init__z_BlockRange.__init__   s+    
!GENNN r5   c                 z    t          |t                    st          S | j        |j        k    o| j        |j        k    S rf   )
isinstancerd   NotImplementedrg   rh   ri   others     r3   __eq__z_BlockRange.__eq__   s7    %-- 	"!!
ek)Cdh%).CDr5   c                     | |k     S rf   r`   ro   s     r3   __ne__z_BlockRange.__ne__   s    5=  r5   c                 8    t          | j        | j        f          S rf   )hashrg   rh   ri   s    r3   __hash__z_BlockRange.__hash__   s    TZ*+++r5   c                 >    t          d          | j        | j        fz  S )NzBlock[%04x, %04x])r   rg   rh   rv   s    r3   __unicode__z_BlockRange.__unicode__   s    )**dj$(-CCCr5   rf   )	r6   r7   r8   r9   rk   rq   rs   rw   ry   r`   r5   r3   rd   rd      sr        II" " " "E E E
! ! !, , ,D D D D Dr5   rd   c                      e Zd ZdZi ZdZ edde          Z edde          Z edde          Z	 ed	d
e          Z
 edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edd e          Z ed!d"e          Z ed#d$e          Z ed%d&e          Z ed'd(e          Z ed)d*e          Z ed+d,e          Z ed-d.e          Z ed/d0e          Z ed1d2e          Z ed3d4e          Z ed5d6e          Z  ed7d8e          Z! ed9d:e          Z" ed;d<e          Z# ed=d>e          Z$ ed?d@e          Z% edAdBe          Z& edCdDe          Z' edEdFe          Z( edGdHe          Z) edIdJe          Z* edKdLe          Z+ edMdNe          Z, edOdPe          Z- edQdRe          Z. edSdTe          Z/ edUdVe          Z0 edWdXe          Z1 edYdZe          Z2 ed[d\e          Z3 ed]d^e          Z4 ed_d`e          Z5 edadbe          Z6 edcdde          Z7 ededfe          Z8 edgdhe          Z9 edidje          Z: edkdle          Z; edmdne          Z< edodpe          Z= edqdre          Z> edsdte          Z? edudve          Z@ edwdxe          ZA edydze          ZB ed{d|e          ZC ed}d~e          ZD edde          ZE edde          ZF edde          ZG edde          ZH edde          ZI edde          ZJ edde          ZK edde          ZL edde          ZM edde          ZN edde          ZO edde          ZP edde          ZQ edde          ZR edde          ZS edde          ZT edde          ZU edde          ZV edde          ZW edde          ZX edde          ZY edde          ZZ edde          Z[ edde          Z\ edde          Z] edde          Z^ edde          Z_ edde          Z` edde          Za edde          Zb edde          Zc edde          Zd edde          Ze edde          Zf edde          Zg edde          Zh edde          Zi edde          Zj edde          Zk edde          Zl edde          Zm edde          Zn edde          Zo edde          Zp edde          Zq edde          Zr edde          Zs edde          Zt edde          Zu edde          Zv edde          Zw edde          Zx edde          Zy edde          Zz edde          Z{ edde          Z| edde          Z} edde          Z~ edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edd e          Z edde          Z edde          Z edde          Z edde          Z ed	d
e          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edd e          Z ed!d"e          Z ed#d$e          Z ed%d&e          Z ed'd(e          Z ed)d*e          Z ed+d,e          Z ed-d.e          Z ed/d0e          Z ed1d2e          Z ed3d4e          Z ed5d6e          Z ed7d8e          Z ed9d:e          Z ed;d<e          Z ed=d>e          Z ed?d@e          Z edAdBe          Z edCdDe          Z edEdFe          Z edGdHe          Z edIdJe          Z edKdLe          Z edMdNe          Z edOdPe          Z edQdRe          Z edSdTe          Z edUdVe          Z edWdXe          Z edYdZe          Z ed[d\e          Z ed]d^e          Z ed_d`e          Z edadbe          Z edcdde          Z ededfe          Z edgdhe          Z edidje          Z edkdle          Z edmdne          Z edodpe          Z edqdre          Z edsdte          Z edudve          Z edwdxe          Z edydze          Z ed{d|e          Z ed}d~e          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edde          Z edd          Zeِd             ZdS (  Blockz*Description of the possible Unicode blocksNr               i  i  iO  iP  i  i  i  i   io  ip  i  i   i  i   i/  i0  i  i  i  i   i  i   iO  iP  i  i  i  i  i  i   i?  i@  i_  i 	  i	  i	  i	  i 
  i
  i
  i
  i   i  i  i  i   i  i  i  i   i  i  i  i   i  i  i  i   i  i   i  i  i  i   i  i   i  i  i  i  i  i   i  i  i  i  i  i   i  i   i?  i@  i_  i`  i  i  i  i   i  i  i  i   iO  iP  i  i  i  i  i  i   i  i   i  i   i  i  i  i  i  i   iO  iP  i  i  i  i   i  i  i  i  i  i   i  i   i  i    io   ip   i   i   i   i   i   i !  iO!  iP!  i!  i!  i!  i "  i"  i #  i#  i $  i?$  i@$  i_$  i`$  i$  i %  i%  i%  i%  i%  i%  i &  i&  i '  i'  i'  i'  i'  i'  i (  i(  i )  i)  i)  i)  i *  i*  i +  i+  i ,  i_,  i`,  i,  i,  i,  i -  i/-  i0-  i-  i-  i-  i-  i-  i .  i.  i.  i.  i /  i/  i/  i/  i 0  i?0  i@0  i0  i0  i0  i 1  i/1  i01  i1  i1  i1  i1  i1  i1  i1  i1  i1  i 2  i2  i 3  i3  i 4  iM  iM  iM  i N  i  i   i  i  iϤ  iФ  i  i   i?  i@  i  i  i  i   i  i   i  i   i/  i0  i?  i@  i  i  iߨ  i  i  i   i/  i0  i_  i`  i  i  iߩ  i   i_  i`  i  i  iߪ  i   i/  i  i  i   i  i  i  i   i  i  i  i   i  i   i  i   i  i   iO  iP  i  i   i  i  i  i   i/  i0  iO  iP  io  ip  i  i   i  i  i  i   i  i  i  i  i? i@ i i i i i i i i i i  i/ i0 iO i i i i i  iO iP i i i i  i? i@ i_ i 	 i	 i 	 i?	 i 
 i_
 i`
 i
 i  i? i@ i_ i` i i  iO i` i i  i i i i   i# i $ i$ i 0 i/4 i h i?j i  i i  i i  i i  iO i  i_ i` i i  i i  i/ i0 i i i i  i i  i i  i i  iO i i i  i i   iߦ i  i? i@ i i  i i   i  i  i i   i i   i c                 N   t          |          }t          |          }t          j        5t	          t          j                                                  t          _        t          j        t          j        |          }|dk    r|t          j        t          j        |dz
                    j	        k    rS|t          j        t          j        |dz
                    j
        k    r%t          j        t          j        |dz
                    S |t          t          j                  k     rx|t          j        t          j        |                  j	        k    rM|t          j        t          j        |                  j
        k    r"t          j        t          j        |                  S t          j        S )z7Return the Unicode block of the given Unicode characterNr   r   )r   ordr{   _RANGE_KEYSsorted_RANGESkeysbisectbisect_leftrg   rh   lenUNKNOWN)r1   r2   
code_pointidxs       r3   r4   z	Block.get}  s(    (##]]
$ &u}'9'9';'; < <E !2J??!GG%-(9#'(BCIII%-(9#'(BCGGG=!237!;<<C&&&&EM%*;C*@AGGGEM%*;C*@AEEE=!23!788= r5   )r6   r7   r8   r9   r   r   rd   BASIC_LATINLATIN_1_SUPPLEMENTLATIN_EXTENDED_ALATIN_EXTENDED_BIPA_EXTENSIONSSPACING_MODIFIER_LETTERSCOMBINING_DIACRITICAL_MARKSGREEK_AND_COPTICCYRILLICCYRILLIC_SUPPLEMENTARMENIANHEBREWARABICSYRIACARABIC_SUPPLEMENTTHAANANKO	SAMARITANMANDAIC
DEVANAGARIBENGALIGURMUKHIGUJARATIORIYATAMILTELUGUKANNADA	MALAYALAMSINHALATHAILAOTIBETANMYANMARGEORGIANHANGUL_JAMOETHIOPICETHIOPIC_SUPPLEMENTCHEROKEE%UNIFIED_CANADIAN_ABORIGINAL_SYLLABICSOGHAMRUNICTAGALOGHANUNOOBUHIDTAGBANWAKHMER	MONGOLIAN.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDEDLIMBUTAI_LENEW_TAI_LUEKHMER_SYMBOLSBUGINESETAI_THAMBALINESE	SUNDANESEBATAKLEPCHAOL_CHIKIVEDIC_EXTENSIONSPHONETIC_EXTENSIONSPHONETIC_EXTENSIONS_SUPPLEMENT&COMBINING_DIACRITICAL_MARKS_SUPPLEMENTLATIN_EXTENDED_ADDITIONALGREEK_EXTENDEDGENERAL_PUNCTUATIONSUPERSCRIPTS_AND_SUBSCRIPTSCURRENCY_SYMBOLS'COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLSLETTERLIKE_SYMBOLSNUMBER_FORMSARROWSMATHEMATICAL_OPERATORSMISCELLANEOUS_TECHNICALCONTROL_PICTURESOPTICAL_CHARACTER_RECOGNITIONENCLOSED_ALPHANUMERICSBOX_DRAWINGBLOCK_ELEMENTSGEOMETRIC_SHAPESMISCELLANEOUS_SYMBOLSDINGBATS$MISCELLANEOUS_MATHEMATICAL_SYMBOLS_ASUPPLEMENTAL_ARROWS_ABRAILLE_PATTERNSSUPPLEMENTAL_ARROWS_B$MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B#SUPPLEMENTAL_MATHEMATICAL_OPERATORS MISCELLANEOUS_SYMBOLS_AND_ARROWS
GLAGOLITICLATIN_EXTENDED_CCOPTICGEORGIAN_SUPPLEMENTTIFINAGHETHIOPIC_EXTENDEDCYRILLIC_EXTENDED_ASUPPLEMENTAL_PUNCTUATIONCJK_RADICALS_SUPPLEMENTKANGXI_RADICALS"IDEOGRAPHIC_DESCRIPTION_CHARACTERSCJK_SYMBOLS_AND_PUNCTUATIONHIRAGANAKATAKANABOPOMOFOHANGUL_COMPATIBILITY_JAMOKANBUNBOPOMOFO_EXTENDEDCJK_STROKESKATAKANA_PHONETIC_EXTENSIONSENCLOSED_CJK_LETTERS_AND_MONTHSCJK_COMPATIBILITY"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_AYIJING_HEXAGRAM_SYMBOLSCJK_UNIFIED_IDEOGRAPHSYI_SYLLABLESYI_RADICALSLISUVAICYRILLIC_EXTENDED_BBAMUMMODIFIER_TONE_LETTERSLATIN_EXTENDED_DSYLOTI_NAGRICOMMON_INDIC_NUMBER_FORMSPHAGS_PA
SAURASHTRADEVANAGARI_EXTENDEDKAYAH_LIREJANGHANGUL_JAMO_EXTENDED_AJAVANESECHAMMYANMAR_EXTENDED_ATAI_VIETETHIOPIC_EXTENDED_AMEETEI_MAYEKHANGUL_SYLLABLESHANGUL_JAMO_EXTENDED_BHIGH_SURROGATESHIGH_PRIVATE_USE_SURROGATESLOW_SURROGATESPRIVATE_USE_AREACJK_COMPATIBILITY_IDEOGRAPHSALPHABETIC_PRESENTATION_FORMSARABIC_PRESENTATION_FORMS_AVARIATION_SELECTORSVERTICAL_FORMSCOMBINING_HALF_MARKSCJK_COMPATIBILITY_FORMSSMALL_FORM_VARIANTSARABIC_PRESENTATION_FORMS_BHALFWIDTH_AND_FULLWIDTH_FORMSSPECIALSLINEAR_B_SYLLABARYLINEAR_B_IDEOGRAMSAEGEAN_NUMBERSANCIENT_GREEK_NUMBERSANCIENT_SYMBOLSPHAISTOS_DISCLYCIANCARIAN
OLD_ITALICGOTHICUGARITICOLD_PERSIANDESERETSHAVIANOSMANYACYPRIOT_SYLLABARYIMPERIAL_ARAMAIC
PHOENICIANLYDIAN
KHAROSHTHIOLD_SOUTH_ARABIANAVESTANINSCRIPTIONAL_PARTHIANINSCRIPTIONAL_PAHLAVI
OLD_TURKICRUMI_NUMERAL_SYMBOLSBRAHMIKAITHI	CUNEIFORM!CUNEIFORM_NUMBERS_AND_PUNCTUATIONEGYPTIAN_HIEROGLYPHSBAMUM_SUPPLEMENTKANA_SUPPLEMENTBYZANTINE_MUSICAL_SYMBOLSMUSICAL_SYMBOLSANCIENT_GREEK_MUSICAL_NOTATIONTAI_XUAN_JING_SYMBOLSCOUNTING_ROD_NUMERALS!MATHEMATICAL_ALPHANUMERIC_SYMBOLSMAHJONG_TILESDOMINO_TILESPLAYING_CARDS ENCLOSED_ALPHANUMERIC_SUPPLEMENTENCLOSED_IDEOGRAPHIC_SUPPLEMENT%MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS	EMOTICONSTRANSPORT_AND_MAP_SYMBOLSALCHEMICAL_SYMBOLS"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C"CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D'CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENTTAGSVARIATION_SELECTORS_SUPPLEMENT SUPPLEMENTARY_PRIVATE_USE_AREA_A SUPPLEMENTARY_PRIVATE_USE_AREA_Br   r_   r4   r`   r5   r3   r{   r{      s9       44GK +ffg66K$VVW=="{667;;"{667;; [99N*{667CC"-+ffg"F"F"{667;;{66733H%+ffg>>{66733H[11F[11F[11F#FFG<<[11F
+ffg
.
.CFFG44Ik&&'22GVVW55Jk&&'22G{66733H{66733HK00EK00E[11Fk&&'22GFFG44Ik&&'22G;vvw//D
+ffg
.
.Ck&&'22Gk&&'22G{66733H+ffg66K{66733H%+ffg>>{66733H,7K,P,P)K00EK00Ek&&'22Gk&&'22GK00E{66733HK00EFFG44I5@[QX5Y5Y2K00E[11F+ffg66KK88M{66733H{66733H{66733HFFG44IK00E[11F{66733H"{667;;%+ffg>>%0[%I%I"-8[-Q-Q* +FFG D D [99N%+ffg>>"-+ffg"F"F"{667;;.9k&&'.R.R+$VVW==;vvw77L[11F([AA)k&&'BB"{667;;$/K$H$H!([AA+ffg66K [99N"{667;;'K@@{66733H+6;vvw+O+O('K@@"{667;;'K@@+6;vvw+O+O(*5+ffg*N*N''2{667'K'K$VVW55J"{667;;[11F%+ffg>>{66733H#FFG<<%+ffg>>*{667CC)k&&'BB!k&&'::O)4VVW)M)M&"-+ffg"F"F{66733H{66733H{66733H +FFG D D[11F#FFG<<+ffg66K#.;vvw#G#G &1k&&'&J&J##FFG<<)4VVW)M)M&)k&&'BB([AA;vvw77L+ffg66K;vvw//D
+ffg
.
.C%+ffg>>K00E'K@@"{667;;;vvw77L +FFG D D{66733HVVW55J%+ffg>>{66733H[11F([AA{66733H;vvw//D$VVW=={66733H%+ffg>>;vvw77L"{667;;([AA!k&&'::O"-+ffg"F"F [99N"{667;;#.;vvw#G#G $/K$H$H!"-+ffg"F"F%+ffg>> [99N&;vvw??)k&&'BB%+ffg>>"-+ffg"F"F$/K$H$H!{66733H$Wgw??$Wgw?? ['7;;N'K'BB!k'7G<<OK'::M['733F['733FWgw77J['733F{7GW55H+gw88Kk'7G44Gk'7G44Gk'7G44G#GWg>>"{7GW==Wgw77J['733FWgw77J#GWg>>k'7G44G(['7CC'K'BBWgw77J&;wAA['733F['733FGWg66I(3GWg(N(N%&;wAA"{7GW==!k'7G<<O +GWg F F!k'7G<<O%0['7%K%K"'K'BB'K'BB(3GWg(N(N%K'::M;w99LK'::M'2{7GW'M'M$&1k'7G&L&L#,7K',R,R)GWg66I +GWg F F$Wgw??)4Wgw)O)O&)4Wgw)O)O&)4Wgw)O)O&.9k'7G.T.T+;w11D%0['7%K%K"'2{7GW'M'M$'2{8Xw'O'O$k"b!!G! ! [! ! !r5   r{   c                 v    t          |           } |t          j        | |          S t          j        |           S )zReturns the digit value assigned to the Unicode character uni_char as
    integer. If no such value is defined, default is returned, or, if not
    given, ValueError is raised.)r   r/   digit)r2   default_values     r3   r_  r_    s;     hH  =999 ***r5   __main__rf   )r9   r   r/   utilr   r   r   objectr   rb   rd   r{   r_  r6   doctesttestmodr`   r5   r3   <module>rf     s.  I IT      ) ) ) ) ) ) ) ) ) ).6 .6 .6 .6 .6v .6 .6 .6b/ / /D D D D D, D D D.k! k! k! k! k!F k! k! k!\+ + + + zNNNGO r5   