
    -i                     $   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d Zd Zd Zd Z d	 Z!d
 Z"d Z#d Z$d Z%d Z&d Z'd Z(ddZ)efdZ*d Z+d Z,d Z-d Z.d Z/d Z0d Z1d Z2d Z3d Z4dS )z'
Basic routines to treat verbs
ar_verb
    )print_functionN)FATHADAMMAKASRASHADDASUKUNHAMZAALEFWAWALEF_HAMZA_ABOVE
ALEF_MADDA	YEH_HAMZAALEF_MAKSURAYEHTEHLAM_ALEFSIMPLE_LAM_ALEFLAM_ALEF_HAMZA_ABOVESIMPLE_LAM_ALEF_HAMZA_ABOVELAM_ALEF_MADDA_ABOVESIMPLE_LAM_ALEF_MADDA_ABOVEc                 8    | d|         |z   | |dz   d         z   S )a  
    Replace a letter in string in position
    @param word: given string
    @type word: unicode
    @param rep: replecment letter
    @type rep: unicode char
    @param pos: replemcment position
    @type pos: int
    @return: modified string
    @rtype: unicode string
    r      N )wordrepposs      O/var/www/html/speakWrite/venv/lib/python3.11/site-packages/libqutrub/ar_verb.pyreplace_posr   ,   s'     #;s?4A<''    c                 "   d}| dk     s| dk    rd}n}| dk    rt           t          f}nh| dk    rt           t          f}nS| dk    rt           t           f}n>| dk    rt          t           f}n)| dk    rt          t          f}n| dk    rt          t          f}|S )a  
    Get the   the past and future marks by the bab sarf number
        - Bab: past  future
        - 1  : FATHA DAMMA
        - 2  : FATHA KASRA
        - 3  : FATHA FATHA
        - 4  : KASRA FATHA
        - 5  : DAMMA DAMMA
        - 6  : KASRA KASRA
    @param number: Bab sarf number (1-6).
    @type number: integer(1-6)
    @return:  a tuple of (past_mark, future_mark)
    @rtype: tuple
    Nr                  r   r   r   )numberbabs     r   get_bab_sarf_harakatr*   =   s     CzzVaZZ	1en	1en	1en	1en	1en	1enJr    c                 4   d}| t           k    r|t          k    rd}n|| t           k    r|t          k    rd}nc| t           k    r|t           k    rd}nJ| t          k    r|t           k    rd}n1| t          k    r|t          k    rd}n| t          k    r|t          k    rd}|S )a  
    Get the bab sarf number by the past and future marks
        - Bab: past  future
        - 1  : FATHA DAMMA
        - 2  : FATHA KASRA
        - 3  : FATHA FATHA
        - 4  : KASRA FATHA
        - 5  : DAMMA DAMMA
        - 6  : KASRA KASRA
    @param past_haraka: past haraka of the verb.
    @type past_haraka: unicode
    @param future_haraka: future haraka of the verb.
    @type future_haraka: unicode
    @return: Bab sarf number (1-6)
    @rtype: integer
    r   r   r#   r$   r%   r&   r"   r'   )past_harakafuture_harakar)   s      r   get_bab_sarf_numberr.   ^   s    " Ce 6 6			-5"8"8			-5"8"8			-5"8"8			-5"8"8			-5"8"8Jr    c                    d}t           dt          dt          dt          dt          j        dt          j        dt          j        dt          j        d	t          j	        d
t          j
        di
}| D ]}||v r|d||         z   z  }|dz  }|S )z
    Write the harakat name in full  in arabic
    @param harakat: list of harakat chars.
    @type  harakat: unicode  string
    @return: harakat in full
    @rtype: unicode
        فتحة   ضمة   كسرة   سكونu   ألفu   واوu   ياءu   ىu   وu   ئ-*)r   r   r   r   vconstALEF_HARAKA
WAW_HARAKA
YEH_HARAKAALEF_YEH_HARAKAALEF_WAW_HARAKAALEF_YEH_ALTERNATIVE)harakatfulltab_harakathrks       r   write_harakat_in_fullrB   ~   s     D	+	)	+	+
y
i
i
5
5
K   +DS)))DDDLDDKr    c                     t           dz  }| dv rt          dz  }nY| dv r(d                    t          t           t          g          }n-| dk    r'd                    t          t          t          g          }|S )a  
    Get the past harakat for the trileteral verb by bab sarf
        - Bab: past  future
        - 1  : FATHA DAMMA
        - 2  : FATHA KASRA
        - 3  : FATHA FATHA
        - 4  : KASRA FATHA
        - 5  : DAMMA DAMMA
        - 6  : KASRA KASRA
    @param vtype: the bab sarf codification.
    @type vtype: unicode a string of number
    @return: harakat
    @rtype: unicode
    r$   12346r0   5)r   r   joinr   vtypemarkss     r   get_past_harakat_by_babsarfrP      sn     !GEa	*		%.//	#%.//Lr    c                     t           t           z   t           z   }| dv rt          t          z   t          z   }n9| dv rt          t           z   t          z   }n| dk    rt          t          z   t          z   }|S )a  
    Get the future harakat for the trileteral verb by bab sarf
        - Bab: past  future
        - 1  : FATHA DAMMA
        - 2  : FATHA KASRA
        - 3  : FATHA FATHA
        - 4  : KASRA FATHA
        - 5  : DAMMA DAMMA
        - 6  : KASRA KASRA
    @param vtype: the bab sarf codification.
    @type vtype: unicode a string of number
    @return: harakat
    @rtype: unicode
    rD   rH   rK   )r   r   r   rM   s     r   get_future_harakat_by_babsarfrR      sd      %KEeE!	*		eE!	#eE!Lr    c                 b    | dk    rt           S | dv rt          S | dv rt          S | dv rt           S dS )u4  
    Get the future_type value from  different codifications. 
    used also in comand line
    in différent context the future_type is codified as:
    values
    or values used as Conjugation mode ( Bab Tasrif باب التصريف)
        - Bab: past  future
        - 1  : FATHA DAMMA
        - 2  : FATHA KASRA
        - 3  : FATHA FATHA
        - 4  : KASRA FATHA
        - 5  : DAMMA DAMMA
        - 6  : KASRA KASRA
    @param vtype: the bab sarf codification.
    @type vtype: unicode a string of number
    @return: haraka
    @rtype: unicode char
    rE   )rF   rJ   )rG   rI   )rE   rK   r0   )r   r   r   )rN   s    r   get_future_haraka_by_babsarfrT      sG    ( ||	*			*			*		rr    c                     t          j        |           r| S | dk    rt          S | dk    rt          S | dk    rt          S | dk    rt
          S dS )u(  
    Convert an arabic named harakat to a real haraka
    values
        - Fahta:(فتحة)
        - DAMMA:(ضمة)
        - KASRA:(كسرة)
    @param haraka_name: the arabic name of haraka.
    @type haraka_name: unicode
    @return: the arabic name of haraka .
    @rtype: unicode char
    r1   r2   r3   r4   F)arabyis_shortharakar   r   r   r   )haraka_names    r   get_haraka_by_namerY      sb     K(( k!!			!	!		#	#		#	#ur    c                 6    t          |           }|r|S t          S )u  
    Get the future_type value by haraka arabic name.
    values
        - FATHA:(فتحة)
        - DAMMA:(ضمة)
        - KASRA:(كسرة)
    @param haraka_name: the arabic name of haraka.
    @type haraka_name: unicode
    @return: haraka
    @rtype: unicode char
    )rY   r   )rX   harakas     r   get_future_type_by_namer\     s#      ,,F r    c                     d|                                  z   } | dv rt          S | dv rt          S | dv rt          S t          S )u  
    Get the future_type value from  different codifications.
     used also in comand line
    in différent context the future_type is codified as:
    values
        - Fahta:(fatha, فتحة, ف, f)
        - DAMMA:(damma, ضمة, ض, d)
        - KASRA:(kasra, كسرة, ك, k)
    or values used as Conjugation mode ( Bab Tasrif باب التصريف)
        - Bab: past  future
        - 1  : FATHA DAMMA
        - 2  : FATHA KASRA
        - 3  : FATHA FATHA
        - 4  : KASRA FATHA
        - 5  : DAMMA DAMMA
        - 6  : KASRA KASRA
    @param future_type: the future_type codification.
    @type future_type: unicode
    @return: extract the future type mark
    @rtype: unicode char
    r0   )fathar1   u   فfrG   rI   )dammar2   u   ضdrE   rK   )kasrar3      كkrF   rJ   )lowerr   r   r   )future_types    r   get_future_type_entreerg     sR    , k'')))KFFFDDDFFF,r    c                 
    | dv S )ug  
    Get the transitive value from  different codifications.
    in différent context the transitivity is codified as:
        - "t", "transitive", 
        - u"متعدي", u"م", u"مشترك", u"ك"
        - True
    @param transitive: the transitive codification.
    @type transitive: unicode
    @return: True if is transitive
    @rtype: boolean
    )u
   متعديu   مu
   مشتركrc   t
transitiveTr   )rj   s    r   get_transitive_entreerk   8  s      & & &r    c                    |                      t                    rt          j        |           }t	          |          dk    r(|                    t          t          t          z             S t	          |          dk    rN|t          j	        v rt          j	        |         d         S |                    t          t          t          z             S |                    t          t          t          z             S |S )z
    Convert Alef madda into two letters.
    @param word: given word.
    @type word: unicode.
    @return: converted word.
    @rtype: unicode.
    r#   r$   r   )

startswithr   rV   strip_harakatlenreplacer	   r
   r7   ALEF_MADDA_VERB_TABLE)r   word_nms     r   normalize_alef_maddars   J  s     z"" %d++w<<1??:uTz:::\\Q&666 3G<Q??
E$J?????:uTz:::r    affixc                 4   d}|dk    rt          j        |           } |                     t                    rt	          |           } t          |           }||k     r?t          j        | |                   r%|dz  }||k     rt          j        | |                   %| |d         } t          j        |           } |                     t          t                    } |                     t          t                    } |                     t          t                    } | S )z
    Normalize the word, by unifoming hamzat, Alef madda, shadda, and lamalefs.
    @param word: given word.
    @type word: unicode.
    @param type: if the word is an affix
    @type type: unicode.
    @return: converted word.
    @rtype: unicode.
    r   rt   r   N)rV   strip_tatweelrm   r   rs   ro   rW   normalize_hamzarp   r   r   r   r   r   r   )r   wordtypeilen_words       r   	normalizer{   e  s    2 	
A 7"4((z"" *#D)) 4yyH
h,,5/Q88,	Q h,,5/Q88,8D &&D<</22D<<,.IJJD<<,.IJJDKr    c                 j   t          |           dk    r| S | t          |           dz
           t          j        k    r[|t          k    r#| dd         t          j        z   | dd         z   } n-|t
          k    r"| dd         t          j        z   | dd         z   } t          |          dk    r+|dd         t          k    r|dd         t          j        z   }n^t          |          dk    r&|dd         t          k    r|dd         t          z   }n%|dd         t          k    r|dd         t          z   }| S )al  
    Convert toi its origin according to the future type haraka
    @param marks: given marks.
    @type marks: unicode.
    @param word_nm: given word unvocalized.
    @type word_nm: unicode.
    @param future_type: The future mark of the triletiral verb.
    @type future_type: unicode char, default KASRA.
    @return: converted marks.
    @rtype: unicode.
    r#   Nr$   )ro   r7   r8   r   r;   r   r<   r
   ALEF_MAMDUDAr   r   )rO   rr   rf   s      r   uniformate_alef_originr     s*    5zzQ	s5zz!|	 2	2	2%#2#Jv55eBCCj@EEE!!#2#Jv55eBCCj@E
7||qWRSS\T11#2#,v22	WaGBCCLD00#2#,s"		%	%#2#,|+Lr    c                 V    |                      t          t          t          z             } | S )z
    Replace shadda by SUKUN +SHADDA
    @param word: given word.
    @type word: unicode.
    @return: converted word.
    @rtype: unicode.
    )rp   r   r   )r   s    r   normalize_affixr     s      <<f--DKr    c                    |                      t          t          t          z             } d}d}d}t          |           }||k     r{t	          j        | |                   sT|| |         z  }|dz   t          |           k     rt	          j        | |dz                      r| |dz            t          k    rg|dz   t          |           k     r@| |dz            t          k    r,|dz   t          |           k     r|t          j	        z  }|dz  }n|t          z  }|dz  }n| |dz            t          k    r|dz   t          |           k     rn| |dz            t          k    rZ|dz   t          |           k    st	          j        | |dz                      s|t          j        z  }|dz  }n|t          z  }|dz  }n| |dz            t          k    r|dz   t          |           k     rl| |dz            t          k    rX|dz   t          |           k    st	          j        | |dz                      s|t          j        z  }|dz  }n|t          z  }|dz  }np|| |dz            z  }|dz  }n\|dz   t          |           k     r,t	          j        | |dz                      r|| |dz            z  }n|t          j        z  }|dz  }n|dz  }||k     {t          |          t          |          k    r||fS dS )z separate the harakat and the letters of the given word, 
    it return two strings ( the word without harakat and the harakat).
    If the weaked letters are reprsented as long harakat and striped 
    from the word.
    r0   r   r   r#   r$   r0   r0   )rp   r   r   ro   rV   rW   r   r
   r7   r8   r   r   r9   r   r   r:   	is_harakaNOT_DEF_HARAKA)r   shaklrr   ry   rz   s        r   uniformate_suffixr     s    <<f--DEG	A4yyH
h,,#DG,, $	tAwGsSYY5#7QqS	#B#B!9%%sSYY4!9+<+<!"1s4yy!33QQ!A#Y%''AaC#d))OOAaCyC''sc$ii''u/CD1I/N/N'!22QQ!A#Y%''AaC#d))OOacc!!sc$ii''u/CD1I/N/N'!22QQT!A#Y&EFAAA#D		//eod1Q3i&@&@/ac"..Q1faK h,,L 7||s5zz!!r    c                 h   | dk    rdS |                      t                    rt          |           } n(|                     t          t          t
          z             } t          j        |           }t          |          }t          |          dk    r%t          j
                            t          |          }|dk    r|d         t
          t          fv s|d         t          t          t
          fv rt          t          z   t          z   }n=| d         t          k    s| d         t           t          fv rt          t"          z   t          z   }nd}t          j        | |                   r|dz  }t          j        | |                   t          j        | |                   s|dz  }t          j        | |                   r|dz  }t          j        | |                   t          j        | |                   s|dz  }t          j        | |                   st          }n| |         }d                    t          |t          g          }t          j
                            t          |          }nw|dk    rt          j        }nd|dk    r4|                     t*                    rt          j        }n7t          j        }n*|d	k    rt          j        }nt          t          |          z  }d}|d         }|d         }||dz
  k     rM||         t
          k    r|d
d         t          j        z   }n|||         z  }|||         z  }|dz  }||dz
  k     M||         t
          k    r?t          |          dk    r!|d         t           k    r|t          j        z  }n|t           z  }n|||         z  }|||         z  }||fS )aW  
    Separate the harakat and the letters of the given word, 
    it return two strings ( the word without harakat and the harakat).
    If the weaked letters are reprsented as long harakat 
    and striped from the word.
    @param word: given word.
    @type word: unicode.
    @return: (letters, harakat).
    @rtype: tuple of unicode.
    r0   r   r$   r   r#   r   r%   r&   r"   Nr~   )rm   r   rs   rp   r	   r
   rV   rn   ro   r7   HAMZAT_PATTERNsubr   r   r   r   r   r   rW   rL   UNIFORMATE_MARKS_4r   UNIFORMATE_MARKS_5TEHUNIFORMATE_MARKS_5UNIFORMATE_MARKS_6r8   r   )r   rr   lengthrO   ry   secondharakanew_wordnew_harakats           r   uniformate_verbr     s~    rzzxz"" 4#D))||Jd
33!$''G\\F
7||q '++E7;; {{1:/000	&6=	=	=%K%EE!W	!!T!Wi0@%@%@%K%EE A&tAw// Q &tAw//  'Q00 Q&tAw// Q &tAw//  'Q00 Q'Q00 '  %#AwHHe\59::E '++E7;;	1)	1c"" 	.0EE-EE	1)c'll"	AqzH(K
fQh,,1:%crc*6+==KK58#K
"H	Q fQh,, qzTw<<1s!2!2++HHOHHGAJ58Kk""r    c                 B   d}| d         }t          |           }||k     r| |         t          t          t          t          fvr|| |         z  }nt          j        | |dz
                     s|dz   |k    st          j        | |dz                      sx| |         t          k    r|t          t          z   z  }n_| |         t          k    r|t          t          z   z  }n;| |         t          k    r|t          t          z   z  }n|| |         z  }n|| |         z  }|dz  }||k     |S )u   
    Treat Harakat on the word before output.
    معالجة الحركات قبل الإخراج،
    @param word: given vocalized word.
    @type word: unicode.
    @return: <vocalized word with ajusted harakat.
    @rtype: unicode.
    r   r   )
ro   r
   r   r   r   rV   rW   r   r   r   )r   rd   r   rz   s       r   standard_harakatr   f  s    	
AAwH4yyH
h,,74c<888QHH 'QqS	22 $qSH__E$8ac$C$C_7d??d
*HH!W^^c	)HH!W^^c	)HHQ'HHDG#	Q' h,,( Or    c                    d}d}d}t          |           }|                     t                    dk     r| |fS ||k     r+|dk    rz|dz   |k     rp| |dz            t          k    r[||         t          t          t
          t          fv r8||dz
           r,||         t          k    rK||dz            t          k    r7|| |         z  }t          | | |         |dz             } |||         z  }|dz  }ns||         t          k    rJ||dz            t          k    r6|| |         z  }t          | | |         |dz             } |t          z  }|dz  }n|dz
  dk    r||dz
           t          k    r~|| |         t          z   z  }||         t          k    r.|dd         ||         z   t          j	        z   ||dz            z   }n |dd         t          z   t          j	        z   ||dz            z   }n|dz
  dk    r]||dz
           t          j
        t          j        t          j        fv r/|| |         t          z   z  }|t          j	        ||dz            z   z  }nn||         t          k    r/|| |         t          z   z  }|t          j	        ||dz            z   z  }n.|| |         t          z   z  }|t          j	        ||dz            z   z  }|dz  }n|dk    r|dz   |k     ry| |dz            | |         k    rd||         t          k    rS||dz            t          t          t
          fv r4|| |         t          z   z  }|t          j	        ||dz            z   z  }|dz  }n|| |         z  }|||         z  }|dz  }||k     +||fS )u   treat geminating cases
    المدخلات هي من كلمة غير مشكولة يقابلها حركاتها
    والحرف المضعف يمثل بشدة
    وإذا كانت الحالة تستوجب الفك، استبدلت الشدة بالحرف المضعف،
    أمّا إذا كانت لا تستوجب الفك، 
فتُعدّل حركة الحرف المضعف الأول إلى حركة ملغاة، 
تحذف في دالة الرسم الإملائي فيما بعد
    @param word_nm: given unvocalized word.
    @type word_nm: unicode.
    @param harakat: given harakat.
    @type harakat: unicode.
    @return: (letters, harakat).
    @rtype: tuple of unicode.
    r0   r   r   Nr~   r#   )ro   findr   r   r   r   r   r   r7   r   r8   r9   r:   )rr   r>   r   r   ry   r   s         r   
geminatingr     s    HK	A\\F ||Fa!!
f** EEacFllwqs|v'='=
ueUE222!2 
e##!(=(=GAJ&%gwqz1Q3??wqz)Q!*%%'!A#,%*?*?GAJ&%gwqz1Q3??u$Q
 Q3!88AaC 0E 9 9
6 11HqzU**&1#2#&6wqz&A!0'1181'> '2#2#&6u&<-'..5acl'; qSAXX+ac"2#V%6!### ## 
6 11H6#81#EEKKQZ5((
6 11H6#81#EEKK 
6 11H6#81#EEK
 QUUqsV||!
(B(B
u1%1F!F!F
6))H601==KFAA
"H71:%KFA f**@ k""r    c                    t          |           t          |          k    r^t          |                     d          t          |           d                    d |D                       t          |                     dS d}d}t	          | |          \  } }t          |           t          |          k    rdS t          |          dk    rg|                    t                    rMt          | z   } t          |          dk    r&|d         t          t          j
        fv rt          |z   }n
t          |z   }t          |           t          |          k    rdS | }|}t          | |          \  } }t          |           t          |          k    rt          d	t          |           |                     d                     t          d
t          |          t          |                     t          t          |          |                    d                     dS t          | |          } t          |           }||k     rX||         t          j        v r%|| |         t          j        ||                  z   z  }n|| |         ||         z   z  }|dz  }||k     Xt          j        D ]\  }}|                    ||          }|S )aS   join the harakat and the letters to the give word
     in the standard script, 
    it return one strings ( the word with harakat and the harakat).

    @param word_nm: given unvocalized word.
    @type word_nm: unicode.
    @param harakat: given harakat.
    @type harakat: unicode.
    @return: vocalized word.
    @rtype: unicode.
    utf8r5   c                 6    g | ]}t          j        |          S r   )rV   name).0xs     r   
<listcomp>zstandard2.<locals>.<listcomp>  s!    =]=]=]PQejmm=]=]=]r    r6   r0   r   r#   r   z
len word: zlen harakat: )ro   printencoderL   r   rm   r   r
   r   r7   r9   r   
homogenizereprtahmeez2WRITTEN_HARAKASTANDARD_REPLACEMENTrp   )	rr   r>   r   ry   word_beforeharakat_beforelen_word_nmpatr   s	            r   	standard2r     s    7||s7||##gnnV$$S\\499=]=]U\=]=]=]3^3^`cdk`l`lmmmt%gw77w<<3w<<''3 w<<1!3!3E!:!:7lG7||q  WQZF-.&/ &/-- w<<3w<<''3 %gw77w<<3w<<'',GgnnV.D.DFFF/3w<<g???$~&&(:(:6(B(BCCC37G,,'ll+oo qzV222
6#8#DDD
71:--FA +oo 1 ' '
c||S#&& Kr    c                 ,   t          |           t          |          k    rdS t          | vr| S d}|D ]3}|t          j        k    s|t          j        k    rt          j        }||z  }4|}d}t          t          |                     D ]}| |         t          k    r| |         t          k    r|| |         z  }1|dk    r*||         }t          j        	                    ||          }n&||dz
           }||         }|dz   t          |           k     rV|t          j
        k    s|t          k    r3| |dz            t          k    r||dz            t          k    r||dz            }|t          j
        k    rt          }|t          j
        k    rt          }|t          j        v r|t          j        |         v rt          j        |         |         }|t          t          j        t          j        t          j        fv rk|t          k    r&| |dz
           t"          j        k    rt"          j        }n| |dz
           t"          j        k    r!|t*          t          j        fvrt"          j        }n| |         }n|t          j
        k    rt          }|t          j
        k    rt          }|t          j        v rb|t          j        |         v rN| |dz
           t"          j        fv r|t          t.          fv rt"          j        }n!t          j        |         |         }n| |         }||z  }|S )u   Transform hamza on the standard script. 
    in entry the word without harakat and the harakat seperately
    return the word with non uniform hamza.
    إعلال و إبدال الهمزة.
    @param word_nm: given unvocalized word.
    @type word_nm: unicode.
    @param harakat: given harakat.
    @type harakat: unicode.
    @return: (letters, harakat) after treatment.
    @rtype: tuple of unicode.
    r0   r   r   )ro   r	   r7   r;   r<   r8   ranger   INITIAL_TAHMEEZ_TABLEgetr   r   r   r   MIDDLE_TAHMEEZ_TABLEr:   r9   rV   r   r   r   r   FINAL_TAHMEEZ_TABLEr   )	rr   r>   ha2rA   r   ry   actualswapbefores	            r   r   r   $  s    7||s7||##s
w

 	 	Cf,,,V+++(3JCCs7||$$ 3	 3	AqzU""wqz5E'E'E
"66$QZF!7;;FFKKDD$QqS\F$QZFsS\\))!V%:::foo&qs|v55'!A#,%:O:O)01!V%:::%*F!V%:::%*F"f&AAA!<V!DDD#)#>v#Fv#ND%5&2CVEWY_Yj*kkk#)U??wqs|uy7P7P+0?DD &-QqS\UY%>%>6SXZ`ZkRlClCl+0;D#*1:DD "V%:::%*F!V%:::%*F"f&@@@6#=f#EEE&qs|~==&UTYNBZBZ&+ktt'-'A&'I&'Q#*1:DKr    c                 @   |                     t                    dk     r|S t          |           }t          |          }||k    r|S d}t          |          D ]}|dz   |k     r||dz            t          k    r||         t          j        k    rI|dz   |k     r$| |dz            t          k    r|t          j        z  }b|dk    r|t          z  }s|t          z  }~||         t          j	        v r|t          j	        ||                  z  }|||         z  }|||         z  }|S )aq   Treat the rencontre of sukun. 
    in entry the word without harakat and the harakat seperately,
     and the probably haraka
    return the new sequence of harakat

    @param word_nm: given unvocalized word.
    @type word_nm: unicode.
    @param harakat: given harakat.
    @type harakat: unicode.
    @return: (letters, harakat).
    @rtype: tuple of unicode.
    r   r0   r   r#   )
r   r   ro   r   r7   r8   r   r   r   CONVERSION_TABLE)rr   r>   rz   len_harakatr   ry   s         r   treat_sukun2r   v  s4    ||EQ7||Hg,,K;x 	* 	*As[  WQqS\U%:%:1:!333 sX~~'!A#,&*@*@#v'99a#u,#u,QZ6#:::6#:71:#FFKK71:-KKwqz)r    c                 8   t          |           t          |          k    r/t          dt          |           t          |                     | |fS t          j        dt          t
          j        t          t          d|           s| |fS |d         }| d         }d}t          |           }||dz
  k     ru| |         }||         }|dz
  dk    r| |dz
           }||dz
           }	nd}d}	|dz   |k     r| |dz            }
||dz            }nd}
d}|dz   |k     r| |dz            t          k    rd}nd	}|t          k    s|t          k    r|t          k    r|t          k    r|dz  }n|t          t          t
          j        fv r&|	t          k    r|s|d
d         t
          j        z   }nw|t          v r|	t          k    r	|r|dz  }nZ|t          k    r<|r:|	t          t          fv r|d
d         t          z   }n)|t          z  }|t          z  }n|t          t
          j        fv r#|	t          k    r|s|t          z  }|t          z  }n|t          t
          j        fv r|	t          k    r|r||z  }|t          z  }n|t          k    r1|	t          k    r&|
t          k    r|s|d
d         t
          j        z   }nl|t
          j        k    r!|	t          k    r|t          z  }|t          z  }n;|t
          j        k    r$|	t          k    r|d
d         t
          j        z   }n||z  }|t          z  }n|t
          j        k    s|t          k    r|t
          j        k    r|t          k    r|dz  }n|t
          j        k    rA|t          t          t
          j        fv r&|	t          k    r|s|d
d         t
          j        z   }ng|t          k    r6|t          t          fv r&|	t          k    r|s|d
d         t
          j        z   }n&|t          t
          j        fv r!|	t          k    r|t          z  }|t          z  }n|t          k    r&|	t          k    r|s|d
d         t
          j        z   }n|t          k    r|r|d
d         t          z   }n|t
          j        k    rB|t          k    r7|r5|	t          k    r|d
d         t          z   }nb|t          z  }|t          z  }nL|t          k    r)|t
          j        k    r|d
d         t
          j        z   }n|t          k    r|t          k    r|	t          k    r|r|dz  }n|t
          j        k    r*|t
          j        k    r|s|d
d         t
          j        z   }n|t          k    r,|t          k    r!|	t          fv r|r|t          z  }|t          z  }n~|t          k    r,|t          k    r!|	t          fv r|r|t          z  }|t          z  }nG|t          k    r"|t          k    r|r|t          z  }|t          z  }n||z  }|t          z  }n
||z  }||z  }|dz  }||dz
  k     u| |         }||         }|dz
  dk    r| |dz
           }||dz
           }	nd}d}	|t          k    s|t          k    r|t          t          fv r$|	t          k    r|d
d         t
          j        z   }nm|t
          j        v r$|	t          k    r|d
d         t
          j        z   }n;|t          k    r|dz  }|dz  }n$|t          k    r<|t          t          t          fv r&|	t          k    r|t
          j        z  }|t"          z  }n|t          k    r<|t          t          t          fv r&|	t          k    r|t
          j        z  }|t          z  }n|t
          j        v r$|	t          k    r|d
d         t
          j        z   }nd|t
          j        k    r!|	t          k    r|t          z  }|t          z  }n3||z  }|t          z  }n"|t
          j        k    r|t          t          t
          j        fv r$|	t          k    r|d
d         t
          j        z   }n|t
          j        v r$|	t          k    r|d
d         t
          j        z   }n|t
          j        k    r|d
d         t
          j        z   }|dz  }nr|t          k    r|	t          k    rnZ|t          k    r|dz  }|dz  }nC|t          k    r+|	t          k    r |t
          j        z  }|t
          j        z  }n||z  }|t          z  }n|t          k    r|t          t          fv r%|	t          k    r|t
          j        z  }|t          z  }n|t          fv r%|	t          k    r|t
          j        z  }|t          z  }n|t
          j        fv r-|	t          t          fv r|d
d         t
          j        z   }|dz  }nR|t          fv r|	t          t          fv r|dz  }|dz  }n-|||         z  }|| |         z  }n|||         z  }|| |         z  }||fS )u   لإreat the jonction of WAW, YEH.
    معالجة التحولات التي تطرا على الياء أو الوا في وسط الكلمة أو في اخرها
    @param word_nm: given unvocalized word.
    @type word_nm: unicode.
    @param harakat: given harakat.
    @type harakat: unicode.
    @return: (letters, harakat)after treatment.
    @rtype: tuple of unicode.
    zHomogenize:inequal length[]r   r   r0   r#   TFNr~   )ro   r   researchr   r7   r   r   r   r   r   r   r:   r   r9   r   r   r
   r8   )rr   r>   r   r   ry   r   actual_letteractual_harakaprevious_letterprevious_harakanext_letternext_harakashadda_in_nextlast_letterlast_harakas                  r   r   r     s
    7||s7||##)3w<<WFFF!! YYllF4G4GS###  F'!! aj1:'ll+a-#AJM#AJMsaxx")!A#,")!A#,"$"$s[  %acl%acl  s[  WQqS\V%;%;!%!&--#1E1E !L00[E5I5I2%KK$uf6G'HHH E)).)"-crc"263D"DKK$..Ou4L4L$ 5M2%KK $--N-&5%.88&1#2#&6u&<#u, C $v/@'AAA!U**>*5(KOHH#v/@'AAA E))n)=0KOHH %..Ou4L4L C'''"-crc"263D"DKK#(9995((5(KOHH$):::SX@X@X"-crc"263D"DKK  =0KOHH F$777c!! F$777u$$2%KK"f&999%0A!BBB!U**N*"-crc"263D"DKK"c))}u~/M/M%..."-crc"263D"DKK %0A(BBB#u,,5(KOHH#// E))>)"-crc"263D"DKK$--.-"-crc"25"8KK#v':::%''^'&%//&1#2#&6u&<#u, S( $s**AR0R0R"-crc"2F4E"EKK#s**0F0FO_dLdLdiwLd"$KK#v':::&"333# 4"-crc"263D"DKK$++=E3I3IchbjOjOj Pk5(KOHH#s**0F0FO`e_gLgLg Mh5(KOHH#s**0F0F 1G5(KOHH
  =0KOHH},M)FAA +a-J ajajQ3!88%aclO%aclOO O O<'';#+=+=u~--Ou4L4L)#2#.v/@@&"344%%)#2#.v/@@&&r!B!S((E5%000o6N6Nv44D !S((E5%000o6N6Nv44L(&"344u$$)#2#.v/@@&"333u$$u$C{*CF///ueV->???u$$)#2#.v/@@!344%%)#2#.v/@@&"333)#2#.v/@@B&&Ou,D,D&&r!B&&?e+C+Cv44F//{*CS  u~--Ou4L4Lv44L(%))50H0Hv|+C6#4"666_QVX]P^=^=^)#2#.v/@@B((5%./P/Pr!Bwqz)GAJ&71:%K
"H+&&r    c                     t          j        |           }|                    t          t          t
          z             }t          |          dk    rdS dS )z Test if the verb is  triliteral, 
    used in selectionof verbs from the triliteral verb dictionnary
    @param verb: given verb.
    @type verb: unicode.
    @return: True if the verb is triliteral.
    @rtype: Boolean.
    r$   TF)rV   rn   rp   r   r	   r
   ro   )verbverb_nms     r   is_triliteral_verbr     sF     !$''Gooj%*55G
7||qt%r    )rt   )5__doc__
__future__r   r   libqutrub.verb_const
verb_constr7   pyarabic.arabyrV   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r.   rB   rP   rR   rT   rY   r\   rg   rk   rs   r{   r   r   r   r   r   r   r   r   r   r   r   r   r    r   <module>r      s  ,       
			 % % % % % %      O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O( ( ("  B  @  :  0  2  @  4  &  >& & &$  6. . . .b :?    @  6 6 6rc# c# c#R     FX# X# X#v= = =@P P Pd, , ,`V' V' V'r    r    