
    -i5                        d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(  ej)        dej*                  Z+ ej)        dee	dede
ede
d	e
ed
e
eded	ede
edej*                  Z, ej)        d-                    dde
de"e$e!ded	ddddddg          ej*                  Z.ddZ/d Z0d Z1dS )z*
Basic routines to validate verbs
ar_verb
    N)"FATHASHADDAHAMZAALEFNOONALEF_HAMZA_ABOVEALEF_HAMZA_BELOW
ALEF_MADDAALEF_MAKSURABEHDADDALDAMMATANFATHATANFEHGHAINHAHHEHJEEMKAFKASRATANKHAHLAMREHSADSHEENTAHTEHTEH_MARBUTATHALTHEHYEHZAHZAINuD   ^است...|ا..ن..|ا..و..|ا..ا.ّ|ا....ّ|ا.ّ.ّ.|ا.ّا..$^([][^]{2}.|[^][^].|[^]{2}]|[^]{4})$|	   ^ا...ّ$^[..$u   ^ا[تذط]ّ[^اّ][^اّ]$	   ^ان...$u/   ^(ازد|اصط|اضط)..$^ا[^صضطظد]ت..$	   ^ا.ّ..$	   ^ا...ى$Tc                 ~   t          j        |           sdS |rt          j        |           }n| }|                    t          t
          t          z             }t          |          }|dk     s|dk    rdS |dk    r5|d         t          k    s"|d         t          k    s|d         t          k    rdS |dk    r|d         t          t          fvrdS |dk    r|d         t          k    rdS t          j        dt          t          t          t          t           d	|           rdS t          j        d
t          t"          t          d	t          dt          dt          dt          dt          dt          dt          dt          t          dt          dt"          t$          d|          rdS t          j        dt$          dt&          t(          t*          t,          t.          t0          t2          t4          t6          t8          t:          t<          t>          t@          t$          d	|          rdS t          j        dtB          t.          t6          t8          t:          d	t          |          rdS t          j        tD          tF          dtF          tD          dtH          tJ          dtJ          tH          dtL          tD          dt@          t*          dt*          t@          |          rdS |dk    rb|'                    t                    rH|d         t          t&          tB          t.          t0          t2          t4          t6          t8          t:          f
v rdS |d         |d         k    r| d         t          k    rdS |dk    r*t          j(        dt          dt          d|          rdS dS |dk    rt          j(        dtR          t
          dt          dt          t          d	t          dt          t          dt          t          dt          dt          dt          t          d|          rdS dS |dk    r|'                    t                    rt          j(        d|          rdS t          j(        dt          dt          t.          t8          d	t          d	|          rdS t          j(        d |          rdS t          j(        d!|          rdS t          j(        d"|          rdS t          j(        d|          rdS t          j(        d#|          rdS t          j(        d$|          rdS dS |'                    t                    rdS dS |dk    rT|'                    t                    s|'                    t                    sdS tT          (                    |          rdS dS dS )%a|  
    Determine if the given word is a valid infinitive form of an arabic verb.
    A word is not valid  infinitive if
        - lenght < 3 letters.
        - starts with : ALEF_MAKSURA, WAW_HAMZA, YEH_HAMZA, HARAKAT
        - contains TEH_MARBUTA, Tanwin
        - contains non arabic letters.
        - contains ALEF_MAKSURA not in the end.
        - contains double haraka : a warning
    @param word: given word.
    @type word: unicode.
    @param is_vocalized: if the given word is vocalized.
    @type is_vocalized:Boolean, default(True).
    @return: True if the word is a valid infinitive form of verb.
    @rtype: Boolean.
    F      r            r1   r(   z([z|^z..z|^.r.   .z]$)r0   z^[^r&   z].$T   r%   r'   r)   r*   r+   r,   r-   r/   r2   r3   u   ^(ازد|اصط|اضط)..$u   ^ا[^صضطظد]ت..$r4   r5   )+arabyis_arabicwordstrip_harakatreplacer
   r   r   lenr   r   researchr	   r   r   r   r   r   r"   r!   r   r   r   r    r$   r   r   r   r   r#   r   r   r   r   r   r   r   r   r   
startswithmatchr   VALID_INFINITIVE_VERB6_PATTERN)word	vocalizedword_nmlengths       R/var/www/html/speakWrite/venv/lib/python3.11/site-packages/libqutrub/verb_valid.pyis_valid_infinitive_verbrM   E   s   $ %% u '-- ooj%*55G\\F zzfkku 
1'!*,,
f0D0DqzVu 
1C;66u	1,,u 
$4$4kkHhh"#'
) 
) pu 
D,,ffffffDD$$$dddDDD,,56=
? 
? f u 
CCttSS$$ddEE33CeeSS##sss$%,
. 
. \ u 
cc44cc333Dg	N	N Uu 
Ccc333SSS##sssDDSS##sssCC&
( 
( Q u
 
A',,S11gajsD$sCcE; 7; 7;u	wqz	!	!d1g#oou 
188TTT6662G<< 	4 U	188

EEE66644ttVVV	vvvvvvvvvttVVV	56=? ? 	
 4 U	1t$$ 	xg.. txxTTT33ccc666J  t ,00 t7AA t2G<< 	t,00 t,00 t,00 t$$ 	45 
1""4(( 	G,>,>s,C,C 	5)//88 	4
 U4    c                 $   g }t          j        |           } t          j        dt          t
          t          t          dd|           } t          |           r|	                    |            |S | 
                    t                    rHt          j        t          t          |           } t          |           r|	                    |            |S n=| 
                    t                    rQt          j        t          t          t          z   | d          }t          |          r|	                    |           |S nt          |           dk    rt          j        t          t          | d          } | t           z   }t          |          r|	                    |           | t"          z   }t          |          r|	                    |           | t          z   }t          |          r|	                    |           | d         t          z   | d         z   }t          |          r|	                    |           |S t          |           dk    r]t%          t          |           dz
            D ];}t          | ||dz            z   }t          |          r|	                    |           <net          |           dk    r\t%          t          |           dz
            D ];}t          | ||d	z            z   }t          |          r|	                    |           <nt          |           d	k    r| d         t          k    s| d         t           k    rG| d         | d         z   | d         z   | d
         z   }t          |          r|	                    |           |                     t                     rG| d         | d         z   | d
         z   | d         z   }t          |          r|	                    |           |S |	                    d           |S |S )z
    Generate a list of valid infinitive verb for an invalid infinitive form.
    @param verb: given verb, of invalid infinitive form.
    @type verb: unicode.
    @return: a list of suggested infinitive verb forms
    @rtype: list of unicode.
    r1   r(    r9      r   r;   r:   r=   r7   u   كتب)r>   r@   rC   subr   r   r   r   rM   appendrE   r	   r   r   r   rB   r   r   rangeendswith)verblist_suggestverb_oneis       rL   suggest_verbrZ      s    Lt$$D66++xx888L D  %% QD!!! 
)	*	* Kv&d33#D)) 	 %%%	  
		 C6$ 0 6a@@#H-- 	 )))	  
Tavd,dA66;#H-- 	*)))$#H-- 	*)))9#H-- 	*)))74<Q'#H-- 	*)))	Ta s4yy{## 	. 	.AD1Q3K'H'11 .##H---	. 
Ta s4yy{## 	. 	.AD1Q3K'H'11 .##H---	. 
Ta 7d??d1g//AwtAwtAw.tAw6H'11 .##H---==   	.
 AwtAwtAw.tAw6H'11 .##H--- 	I&&&rN   c                    |                      t                    rt          j        |           }t	          |          dk    r(|                    t          t          t          z             S t	          |          dk    r_t          j	        
                    |          rt          j	        |         d         S |                    t          t          t          z             S |                    t          t          t          z             S |S )z
    Convert Alef madda into two letters.
    @param word: given word.
    @type word: unicode.
    @return: converted word.
    @rtype: unicode.
    rQ   r7   r   )rE   r
   r>   r@   rB   rA   r   r   vconstALEF_MADDA_VERB_TABLEhas_key)rH   rJ   s     rL   normalize_alef_maddar_   O  s     z"" %d++w<<1??:uTz:::\\Q+33G<< @ 3G<Q??
E$J?????:uTz:::rN   )T)2__doc__rC   libqutrub.verb_const
verb_constr\   pyarabic.arabyr>   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   compileUNICODErG   VALID_INFINITIVE_VERB4_PATTERNjoinVALID_INFINITIVE_VERB5_PATTERNrM   rZ   r_    rN   rL   <module>rj      s  ,  
			 & % % % % %      ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) 
RTVT^ _ _  ",EEE66644ttVVVTTvvv'(*
"4 "4 
 ",TYY$0L!%ssDD###vvv>*%8 . . *" " (_ _ _ _Dc c cP    rN   