
    -i:                        d Z ddlmZmZmZmZ ddlZddlZddlm	Z
 i de
j        de
j        de
j        de
j        de
j        d	e
j        d
e
j        de
j        de
j        de
j        de
j        de
j        de
j        de
j        de
j        de
j        de
j        i de
j        de
j        de
j        de
j        de
j         de
j!        de
j"        de
j#        de
j$        de
j%        de
j&        d e
j'        d!e
j(        d"e
j)        d#e
j*        d$e
j+        d%e
j,        e
j-        e
j.        e
j/        e
j0        e
j1        e
j2        e
j3        e
j4        e
j5        e
j6        e
j7        e
j8        e
j        d&Z9i dd'ddddddddd	dd
ddd(dddddddddddddd)dd*dd+i dd,dd-dd.ddddddddddddddddd d	d!d/d"d0d#d0d$d0d%d0d1d0d2d3d4d5d6d7d8e
j6        d2d'd2d&Z:i d9d:d;dd<dd=d:d>d:d?d:d@ddAddBddCddDdEdFd	dGd
dHdIdJddKdLdMdi dNddOddPdQdRddSddTddUdVdWddXddYd2dZdd[dd\dd]dd^dd_dd`ddd3d d4d8d6d3d7d5d2d2d3daZ;db e9<                                D             Z=de=e
j        <   ej>        dck    r5e?@                    e
jA        e
jB        z   dd          ZCe?@                    e
jA        e
jB        z   de          ZDe?@                    dde
jA        e
jB        z             ZEe?@                    dee
jA        e
jB        z             ZFe?@                    d2G                    e
jH                  d2G                    e
jI                            ZJe?@                    d2G                    e
jH                  d2G                    e
jK                            ZLe?@                    d2G                    e
jI                  d2G                    e
jH                            ZMe?@                    d2G                    e
jI                  d2G                    e
jK                            ZNe?@                    d2G                    e
jK                  d2G                    e
jH                            ZOe?@                    d2G                    e
jK                  d2G                    e
jI                            ZPdf ZQnOdg  eRe
jA        e
jB        z   dd          D             ZCdh  eRe
jA        e
jB        z   de          D             ZDdi  eRdde
jA        e
jB        z             D             ZEdj  eRdee
jA        e
jB        z             D             ZFdk  eRe
jH        e
jI                  D             ZJdl  eRe
jH        e
jK                  D             ZLdm  eRe
jI        e
jH                  D             ZMdn  eRe
jI        e
jK                  D             ZNdo  eRe
jK        e
jH                  D             ZOdp  eRe
jK        e
jI                  D             ZPdq ZQdr ZSds ZTdduZUdv ZVdw ZWdx ZXdy ZYdd{ZZdd~Z[ddZ\ddZ]e^dk    rzd_                    d          Z`e`D ]Za eSea          Zb eTeb          Zc eVea          Zd eXeadd          Ze eXeedd          Zf eXeadd          Zg ehdG                    eaebecedeeefeg e?eceak               e?eeebk               e?efeck               e?egedk              g          i                    d                     d e9<                                D             Zj ehej           ddlkmlZl dZm eh el eYem                                eZemdd          Zn eheni                    d                      eZemdd          Zn eheni                    d                     dS dS )z
Arabic Transliteration routins
@author: Taha Zerrouki
@contact: taha dot zerrouki at gmail dot com
@license: GPL
@date:2018/08/146
@version: 0.1
    )absolute_importprint_functionunicode_literalsdivisionNAbtpvjHxd*rzs$SDTZEgfqklmnhwyY'&><)|}_aFiKuN~o`{za:Xzs'zd'zt'zD'z?'G:?z?a: r,   anr.   inr0   unu   ء2u   آu   أu   ؤu   إu   ئu   اu   بu   ةu   تu   ثthu   جu   حu   خkhu   دu   ذdhu   رu   زu   سu   شshu   صu   ضu   طu   ظzhu   عu   غu   ـu   فu   قu   كu   لu   مu   نu   ه)u   وu   ىu   يu   ًu   ٌu   ٍu   َu   ُu   ِu   ّu   ْu   ٰc                     i | ]\  }}||	S  rE   .0r   r   s      L/var/www/html/speakWrite/venv/lib/python3.11/site-packages/pyarabic/trans.py
<dictcomp>rI      s    000daQ000    )   r   	012345678	0AUIauio3c                 ,    |                      |          S )# translate a word accoring to table)	translate)wordtables     rH   rP   rP      s    ~~e$$$rJ   c                     i | ]\  }}||	S rE   rE   rG   cr	   s      rH   rI   rI          ZZZ1!ZZZrJ   c                     i | ]\  }}||	S rE   rE   rT   s      rH   rI   rI      rV   rJ   c                     i | ]\  }}||	S rE   rE   rT   s      rH   rI   rI          \\\1!\\\rJ   c                     i | ]\  }}||	S rE   rE   rT   s      rH   rI   rI      rY   rJ   c                     i | ]\  }}||	S rE   rE   rT   s      rH   rI   rI          GGGA1GGGrJ   c                     i | ]\  }}||	S rE   rE   rT   s      rH   rI   rI      r\   rJ   c                     i | ]\  }}||	S rE   rE   rT   s      rH   rI   rI      r\   rJ   c                     i | ]\  }}||	S rE   rE   rT   s      rH   rI   rI      r\   rJ   c                     i | ]\  }}||	S rE   rE   rT   s      rH   rI   rI      r\   rJ   c                     i | ]\  }}||	S rE   rE   rT   s      rH   rI   rI      r\   rJ   c                 F    d}| D ]}||                     ||          z  }|S )rO   r:   )get)rQ   rR   mystrmychars       rH   rP   rP      s5     	0 	0FUYYvv...EErJ   c                 P    d}| D ] }|t                               ||          z  }!|S )z.Tranliteration to UTF-8 conversion of a stringr:   )	t2a_tablerc   r   rd   re   s      rH   tim2utf8ri      5    E 0 0vv...LrJ   c                 P    d}| D ] }|t                               ||          z  }!|S )z7Tranliteration to Tim Buckwalter conversion of a stringr:   )	a2t_tablerc   rh   s      rH   utf82timrm      rj   rJ   r2   c                     | d         |k    r
| dd          } || v rB|                      |          }|dz
  dk    r |                     || |dz
           d          } || v B| S )Nr      )indexreplace)rQ   shaddar.   s      rH   convertShaddars      st    Aw!""X
D..::fQ366 fT!A#Yq11D D.. KrJ   c                     d}t          |           } | D ] }|t                              ||          z  }!t          j        dd|          }t          j        dd|          }|S )zTranliteration to SAMPA code phonemes conversion of a string
    We suppose that all words are full vocalized.
    We convert according to t2s table,
    and the shadda is converted to double letter
    r:   z(?<=u)wr8   z(?<=i)j)rs   t2sampa_tablerc   resubrh   s      rH   	tim2samparx      sp     E 	aA 4 4""66222 F9S''EF9S''ELrJ   c                 P    d}| D ] }|t                               ||          z  }!|S )z@Tranliteration from UTF-8  to latin with plain english no symbolr:   )
a2en_tablerc   rh   s      rH   
utf82latinr{   	  s5    E 1 1///LrJ   c                 N   |                                 }|                                 }|dv rJ|dv rt          |           S |dk    rt          t          |                     S |dv rt          |           S | S |dv r*|dv rt	          |           S |dk    rt          |           S | S dS )z5
    convert text from code_from to code_to
    
    )utfutf8arabic)tim
buckwaltersampa)latinasciiN)lowerrm   rx   r{   ri   )text	code_fromcode_tocode1code2s        rH   convertr     s    
 OOEMMOOE))))))D>>!gXd^^,,,(((d###K%%%---D>>!gT??"K &%rJ   c                    | s| S g }t          j        d| d                   rd}nd}d}| D ]}t          j        d|          r$|r||z  }|                    d|f           d}|};t          j        d|          r||z  }V|rt          |          }d}t          j        d|||dz                      s%|dz  }t          j        d|||dz                      %||dz   d	         }|d	|dz            }|                    d
|f           d}||z   }||z  }|r|                    d
|f           n|                    d|f           |S )z
    Detect language
    u   [؀-ۿ]r   TFr:   r   z[\s\d\?, :\!\(\)]ro   Nr   )rv   searchappendlen)r   
resultlistr   actual_textr   r.   	temp_texts          rH   segment_languager   +  s    J	y#T!W-- K ! !9'++ 	!  q !!7K"8999Y+Q// 	!1KK !$$	)$6AacE8JKK FA )$6AacE8JKK '!-	)$1Q3$/!!8["9:::'kq  28[122227K0111rJ   r   c                     g }t          |           }|D ]<\  }}||k    r|                    |||           '|                    |           =d                    |          S )N )r   r   join)r   languagestartendnew_chunks_listchunkslangchunks           rH   delimite_languager   T  s    Od##F * *u8##eeEE33$?@@@@""5))))99_%%%rJ   allwestc                    |                                 }|                                 }|dv sJ d|z              |dv sJ d|z              ||k    r| S t          t          dt          t          dt
          t          dd}|dk    r2||= |                                D ]}t          | ||                   } | S t          | ||         |                   S )	u  
    Normalize digits to and from the following writing systems:
    west:    Western Arabic numerals                (0123456789)
    east:    Eastern Arabic (Hindu-Arabic) numerals (٠١٢٣٤٥٦٧٨٩)
    persian: Persian/Urdu numerals                  (۰۱۲۳۴۵۶۷۸۹) 
    
    if `source = all`, then all digits contained in the text 
    will be normalized into `out` writing system.
    Otherwise digits written in `source` will be normalized
    without affecting the rest of the digits. 

    Example:
        >>> text = u'۰۱۲۳۴۵۶۷۸۹ ٠١٢٣٤٥٦٧٨٩ 123456789'
        >>> normalize_digits(text, source='all', out='west')
        '0123456789 0123456789 0123456789' 
        >>> normalize_digits(text, source='persian', out='west')
        >>> '0123456789 ٠١٢٣٤٥٦٧٨٩ 0123456789' 

    @param text: unnormalized text.
    @type text: unicode.
    @param source: Writing system for the digits to be normalized.
                   (default is all).
    @type source: string
    @param out: Intended writing system for source.
                (default is west)
    @return: returns a normalized text.
    @rtype: unicode.
    )r   r   eastpersianzInvalid option for `source`: %s)r   r   r   zInvalid option for `out`: %s)r   r   )r   r   )r   r   r   )	r   	W2E_TRANS	W2P_TRANS	E2W_TRANS	E2P_TRANS	P2W_TRANS	P2E_TRANSvaluesrP   )r   sourceoutsource_to_out_tbltbls        rH   normalize_digitsr   a  s    : \\^^F
))++C77779Z]c9c777----/MPS/S---}}"y99"y99%y99 
 c"$++-- 	- 	-CT3s8,,DDT,V4S9:::rJ   r   c                    t          j        |           \  }}|dk    rt          |t                    }n1|dk    rt          |t                    }nt          |t                    }|dk    r	 t          |          }n#  | dfcY S xY w||fS )uV  
    Encode word marks into decimal or Ascii string to be saved as integer
    
    Example:
        >>> import pyarabic.trans
        >>> word1 = u"هَارِبًا"
        >>> pyarabic.trans.encode_tashkeel(word1)
        ('هاربا', 'a0iA0')
        >>> pyarabic.trans.encode_tashkeel(word1, "decimal")
        ('هاربا', 40610)
        >>> letters = u"هاربا" 
        >>> encoded_marks = u"a0iA0"
        >>> pyarabic.trans.decode_tashkeel(letters, encoded_marks)
        'هَارِبًا'
        >>> letters = u"هاربا" 
        >>> encoded_marks = 40610
        >>> pyarabic.trans.decode_tashkeel(letters, encoded_marks, "decimal")
        'هَارِبًا'


    @input word: diacritized arabic diacritcs
    @type word: unicode
    @return:  (letters, encoded) zero if fails
    @rtype: (letters, encoded) ttring/ integer
    decimalr   r:   )arseparaterP   	T2D_TRANS	T2A_TRANSint)rQ   methodlettersmarkstranseds        rH   encode_tashkeelr     s    4 [&&NGUE9--	7		E9--E9-- 	'llGG	8OOOGs   ,A< <Bc                 z   	 t          |          t          k    rt          |          }|                    t          |           t          d                    }|dk    rt	          |t
                    }n1|dk    rt	          |t                    }nt	          |t                    }t          j        | |          }|S )z decode tashkeel0r   r   )	typestrrjustr   rP   	D2T_TRANS	A2T_TRANSr   joint)rQ   r   r   r   word2s        rH   decode_tashkeelr     s     E{{sE

KKD		#c((++EE9--	7		E9--E9--HT7##ELrJ   __main__zqulo
>aEuw*u
bi
rab~i
{l
n~aAsi
maliki
{l
n~aAsi
<ila`hi
{l
n~aAsi
min
$ar~i
{lo
wasowaAsi
{lo
xan~aAsi
{l~a*iY
yuwasowisu
fiY
Suduwri
{l
n~aAsi
mina
{lo
jin~api
wa
{l
n~aAsi
r   r}   r   	r~   c                     i | ]\  }}||	S rE   rE   rF   s      rH   rI   rI     s    888daQ888rJ   )areprub   السلام عليكم how are you, لم اسمع أخبارك منذ مدة, where are you goingz\RL{r*   )r   r   z<arabic>z	</arabic>)r2   )r   r(   r'   )r   r   )r   )o__doc__
__future__r   r   r   r   sysrv   pyarabic.arabyarabyr   ALEFBEHTEHTEH_MARBUTATHEHJEEMHAHKHAHDALTHALREHZAINSEENSHEENSADDADTAHZAHAINGHAINFEHQAFKAFLAMMEEMNOONHEHWAWYEHALEF_MAKSURAHAMZA	WAW_HAMZAALEF_HAMZA_ABOVEALEF_HAMZA_BELOW
ALEF_MADDA	YEH_HAMZATATWEELFATHAFATHATANKASRAKASRATANDAMMADAMMATANSHADDASUKUN	MINI_ALEFrg   ru   rz   itemsrl   version_infor   	maketransNOT_DEF_HARAKATASHKEEL_STRINGr   r   r   r   r   NUMBERS_EASTNUMBERS_WESTr   NUMBERS_PERSr   r   r   r   r   rP   zipri   rm   rs   rx   r{   r   r   r   r   r   r   __name__splitwordsrQ   r   timur   arabic2timu2sampa2printencodeutf2tim_tablearabreprr   r   text_outrE   rJ   rH   <module>r     s               


 				      1 RW1 RV1 RV1 R^	1
 RW1 RW1 RV1 RW1 RV1 RW1 RV1 RW1 RW1 RX1 RV1  RV!1" RV#1 1$ RV%1& RV'1( RX)1* RV+1, RV-1. RV/10 RV112 RW314 RW516 RV718 RV91: RV;1< R_=1> bh?1@ R\A1B RC1D RE1 1F ]\ZX[X[X[YX\Wa1 1 1	n0 T0 S0 S0 S	0
 S0 S0 S0 S0 S0 S0 S0 S0 S0 S0 T0  T!0" T#0 0$ T%0& T'0( S)0* S+0, S-0. S/00 S102 S304 S506 S708 S90: S;0< c=0> c?0@ SA0B SC0D SE0 0F 			Y	_0 0 0f-T3 -s-s- s- s	-
 s- s- s- s- s- t- s- s- t- s- t-  s!- -" s#-$ s%-& t'-( s)-* s+-, s--. t/-0 s1-2 s3-4 r5-6 s7-8 s9-: s;-< s=-> s?-@ sA-B sC- -D 




		
[- - -
^ 10ioo//000		"' v}}R.1CC[QQI}}R.1CC[QQI}}[B,=@R,RSSI}}[B,=@R,RSSIbggbo668P8PQQIbggbo668P8PQQIbggbo668P8PQQIbggbo668P8PQQIbggbo668P8PQQIbggbo668P8PQQI% % % % [ZB$58J$JL Y YZZZIZZB$58J$JL Y YZZZI\\LB4EHZ4Z [ [\\\I\\LB4EHZ4Z [ [\\\IGGR_bo!F!FGGGIGGR_bo!F!FGGGIGGR_bo!F!FGGGIGGR_bo!F!FGGGIGGR_bo!F!FGGGIGGR_bo!F!FGGGI      
 
 
 
  (    4' ' 'R& & & &-; -; -; -;^) ) ) )Z   0 z
: %++; 
<  n n$&!!4'$e,,''5//'$g..ejj$egufssSWY]S]`c`cdkmsds`t`tvyvyz  BF  {F  wG  wG  IL  IL  MS  UZ  MZ  I[  I[  ]  ^  ^  e  e  fl  m  m  	n  	n  	n  	n 98ioo&7&7888M	E-
sD	E%%  &&
'
'(((F<<<H	E(//&
!
!"""JKHHHH	E(//&
!
!"""""i rJ   