
    -i9                         d Z ddlZddlZddlmZ 	 ddlZddlZn#  ddl	mZ ddl
mZ Y nxY wddlZ G d dej                  ZdS )z
    Arabic verb stemmer
    Nc                       e Zd Zd Zed             Zed             Zed             Zed             Zd Z	ddZ
g dfd	Zdd
ZddZd Zd Zed             ZdS )verb_affixerc                    t           j                             |            t          j        | _        t          j        | _        t          j        | _        t          j	        | _
        t          j        | _        t          j        | _        t          j        | _        t          j        | _        t          j        | _        t          j        | _        d S N)basic_affixer__init__SVCCOMP_PREFIX_LIST
procleticsCONJ_PREFIX_LISTprefixesCONJ_SUFFIX_LISTsuffixesCOMP_SUFFIX_LIST	encliticsVERBAL_CONJUGATION_AFFIXaffixesCOMP_PREFIX_LIST_TAGSprocletics_tagsCOMP_SUFFIX_LIST_TAGSenclitics_tagsTABLE_AFFIXtable_affixEXTERNAL_PREFIX_TABLEexternal_prefix_tableEXTERNAL_SUFFIX_TABLEexternal_suffix_table)selfs    S/var/www/html/speakWrite/venv/lib/python3.11/site-packages/alyahmor/verb_affixer.pyr   zverb_affixer.__init__!   s    #,,T333.,,-3  #8 "7?%(%>"%(%>"""    c                 $    t          | ||          S r   )check_clitic_affix)	procliticencliticaffixs      r   r"   zverb_affixer.check_clitic_affix=   s    !)Xu===r    c                 $   g }|                      t          j                  rn|                    t          j        t          j        z   | dd         z              |                    t          j        t          j        z   | dd         z              |S )z$ return modified forms of input verb   N)
startswithar
ALEF_MADDAappendALEF_HAMZA_ABOVEHAMZAALEF)verb	verb_lists     r   get_verb_variantszverb_affixer.get_verb_variants@   s     	??2=)) 	<R023FF!""X   RX/$qrr(:;;;r    c                    g }|r|                      t          j        t          j        z   t          j        z             r|                    | dd                    n|                      t          j                  r#|                    | t          j        z              nI|                      t          j                  r*|                    | dd         t          j        z              |                     t          j	                  rn|                    t          j
        t          j
        z   | dd         z              |                    t          j        t          j        z   | dd         z              |S )z$ return modified forms of input stemNr'   )endswithr)   TEHMEEMWAWr+   r.   ALEF_MAKSURAr(   r*   r,   r-   )stemr$   	list_stems      r   get_in_stem_variantsz!verb_affixer.get_in_stem_variantsJ   s>    	 	>}}RVbg-677 >  crc++++rv&& >  0000rw'' >  crcR_!<===??2=)) 	<R0 "&qrr(+ , , ,RX/$qrr(:;;;r    c                     |t           j        t           j        z   k    rW|                     t           j                  s|                     t           j                  rt           j        t           j        z   }|S )u  
        Get the enclitic variant to be joined to the word.
        For example: word  =  أرجِهِ , enclitic = هُ.
        The enclitic  is convert to HEH+ KAsra.
        اعبارة في مثل أرجه وأخاه إلى يم الزينة
        @param word: word found in dictionary.
        @type word: unicode.
        @param enclitic: first level suffix vocalized.
        @type enclitic: unicode.
        @return: variant of enclitic.
        @rtype: unicode.
        )r)   HEHDAMMAr4   KASRAYEH)wordr$   s     r   get_enclitic_variantz!verb_affixer.get_enclitic_variant[   sS     rvbh&&DMM"(,C,C&MM"&!! 'v(Hr    c           	         |r6|                     t          j        t          j        z             r
|dd         }|rC|                     t          j        t          j        z   t          j        z             r
|dd         }|r6|                     t          j                  r|dd         t          j        z   }|rj|                     t          j        t          j        z   t          j        z   t          j        z             r$|dd         t          j        z   t          j        z   }|rU|                     t          j        t          j        z   t          j        z             r|t          j        t          j        z   z  }g }| j	        
                    |i           
                    dd          D ]}| j        
                    |i           
                    dd          D ]}|                     ||          }d                    |||g          }d                    |t          j        |          |g          }d                    |d|d|g          }	|                    |||	f           |S )a  
        Join the  verb and its affixes, and get the vocalized form
        @param verb: verb found in dictionary.
        @type verb: unicode.
        @param proclitic: first level prefix.
        @type proclitic: unicode.
        @param enclitic: first level suffix.
        @type enclitic: unicode.
        @return: (vocalized word, semivocalized).
        @rtype: (unicode, unicode).
        Nr3   	vocalized -)r4   r)   r7   r.   SUKUNr8   r5   r>   r6   r   getr   rB   joinstrip_lastharakar+   )
r   r/   r#   r$   word_tuple_listproclitic_vocenclitic_vocrD   semivocalizedsegmented_words
             r   vocalizezverb_affixer.vocalizep   s7     	bfrw&677 	9D 	bfrx&7"'&ABB 	9D 	'bo66 	'9rw&D 	2bfRXo&?&IJJ 	2"I(261D 	&bfRXo&?@@ 	&BHrv%%D "155iDDHHVXYY 	S 	SM $ 3 7 7" E E I I+WY Z Z S S#88|LLGG]D,$GHH	 "B/55|D!F !F"&))]2tB,U"V"V&&	=.'QRRRRS r       فتحةc           	         g }t          j        | j        | j        | j        | j                  D ]S}|d         }|d         }|d         }|d         }|                     ||||||          }	|	r|                    |	           T|S )z generate all possible affixesr   r'         )	itertoolsproductr   r   r   r   get_formextend)
r   rA   future_type
verb_formselementprocprefsuffencnewwordlists
             r   generate_formszverb_affixer.generate_forms   s    
 
 (4=$-Y]Yghh 	/ 	/G1:D1:D1:D!*C--dD$[QQK /!!+...r    c                     g }|d         }|d         }|d         }|d         }|| j         vs|| j        vs|| j        vs	|| j        vrg S |                     ||||||          }|S )z2 generate all possible word forms by given affixesr   r'   rS   rT   )r   r   r   r   rW   )	r   rA   r   rY   rZ   r\   r]   r^   r_   s	            r   generate_by_affixesz verb_affixer.generate_by_affixes   s     
qzqzqzaj''4t}+D+DUYUbIbIbfiquqffI]]4tD#{KK
r    c                 L   g }d}d}	t           j                            ||	|          }
|                     ||          r|dz   |z   }|                     |||          r|| j        v r| j        |         D ]}|d         }|d         }|                     |||||	          }|r-|
                    ||          }|                     |||          }|rT|rRd |D             }|D ].}|	                    | 
                    |||||                     /|                    |           |S )z generate the possible affixesrE   TrF   r   r'   c                 ,    g | ]}t          |          S  )list).0xs     r   
<listcomp>z)verb_affixer.get_form.<locals>.<listcomp>   s    +J+J+JDGG+J+J+Jr    )	libqutrub	classverb	VerbClassis_valid_affixr"   r   check_clitic_tenseconjugate_tense_for_pronounrP   r+   get_tagsrX   )r   rA   r\   r]   r^   r_   rY   	list_wordnewword
transitivevbcr%   pairtensepronounok	conj_verbnewword_list
word_tuples                      r   rW   zverb_affixer.get_form   su   
 	
 !++D*[IItT** 	;HTME&&tS%88 ;D,,, $ 0 7 ; ; $Q"&q'!44T39>U U P(+(G(Gw(W(WI+/==D3+O+OL ;, ;+J+J\+J+J+JL.: c c
 * 1 1$--tSRWY`2a2a b b b b%,,\:::r    rE   c                    g }| j                             |i                               dd          }|                    |           | j                            |i                               dd          }|                    |           |                    |           |                    |           d |D             }d                    t          |                    S )z+
        Get affixes tags
        
        tagsrf   c                     g | ]}||S rf   rf   )rh   ts     r   rj   z)verb_affixer.get_tags.<locals>.<listcomp>   s    ++++1+++r    :)r   rH   rX   r   r+   rI   rg   )	r   rA   	procleticr$   rw   rx   taglistproclitic_tagsenclitic_tagss	            r   rq   zverb_affixer.get_tags   s    
 -11)R@@DDVBOO~&&& +//"==AA&LL}%%% 	uw ,+g+++xxW&&&r    c                    |s|sdS d}|sd}nY|| j         v rP|dk    rd}nG| j                            |g           D ])}|d         | j                             |d          v rd} n*d}|r\|sdS || j        v rO|dk    rdS | j                            |g           D ]'}|d         | j                            |d          v r n(dS dS dS )a  
        Verify if proaffixes (sytaxic affixes) are compatable with affixes
        (conjugation)
        @param proclitic: first level prefix.
        @type proclitic: unicode.
        @param enclitic: first level suffix.
        @type enclitic: unicode.
        @param affix: second level affix.
        @type affix: unicode.
        @return: compatible.
        @rtype: True/False.
        TFrF   r   rE   r'   )r   r   rH   r   )r   r#   r$   r%   proclitic_compatibleitems         r   r"   zverb_affixer.check_clitic_affix   sO   "  &	$ &	$4#(  5'+$$d888C<<+/(( $ 0 4 4UB ? ? 5 57d&@&D&D )2'/ '/ / /370!E/
 05,# $ $4!;;;||#t$($4$8$8$C$C ) )D#Aw$*D*H*H$,b+2 +2  2  2 !& 2 $)5#tur    c           	      <   d                     ||||t          |          g          }|s|t          j        j        v rdS |s|sdS |r|t          j        j        v rdS |r|| j                            |d          v r!|r|| j                            |d          v rdS dS )zI
        test if the given tenses are compatible with proclitics
        r   FTrE   )rI   strr	   qutrubVerbConstTablePassiveTenser   rH   r   )r   r#   r$   rw   rx   rt   comp_keys          r   ro   zverb_affixer.check_clitic_tense,  s     99%__   	es':'LLL5 	 	4
  	!4!FFF5  	4599)RHHHH I!;!?!?"!M!MMM4 5r    c                 $      fd|D             S )ad  
        Verify possible affixes in the resulted segments
        according to the given affixes list.
        @param word: the input word.
        @type word: unicode.
        @param list_seg: list of word segments indexes (numbers).
        @type list_seg: list of pairs.
        @return: list of acceped segments.
        @rtype: list of pairs.
        c                 ~    g | ]9}d                      d|d                  |d         d         g          v 7|:S )rF   Nr   r'   )rI   )rh   s
affix_listrA   s     r   rj   z-verb_affixer.verify_affix.<locals>.<listcomp>]  sY     D D Daxxeqted1Q455k233zAA AAAr    rf   )rA   list_segr   s   ` `r   verify_affixzverb_affixer.verify_affixO  s<    D D D D D8 D D D 	Dr    N)rQ   )rE   rE   )__name__
__module____qualname__r   staticmethodr"   r1   r;   rB   rP   ra   rc   rW   rq   ro   r   rf   r    r   r   r       s5       ? ? ?8 > > \>  \   \    \(' ' 'R   " 35*       B' ' ' '09 9 9t! ! !F D D \D D Dr    r   )__doc__pprintrU   pyarabic.arabyarabyr)   r   aly_stem_verb_constr	   alyahmor.basic_affixeralyahmor.aly_stem_verb_constlibqutrub.classverbrk   r   rf   r    r   <module>r      s    
           /%%%%%/222222........    ~D ~D ~D ~D ~D=. ~D ~D ~D ~D ~Ds    +