
    -i                         d Z ddlmZ ddlmZ ddlZddlm	Z
 ddlZddlZddlmZ ddlmZ ddlmZ ddlmZ  G d d	          Zd
 Zedk    r e             dS dS )z
Arabic verb stemmer
    N)arepr   )print_table)custom_dictionary)wordcasec                       e Zd ZdZddZd Zd Zd Zd Zd Z	d	 Z
d
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zej        dddfdZed             Zed             Zed             Zed             Zd Zd ZdS )VerbStemmerz
    Arabic verb stemmer
    Fc                 
   t           j                                        | _        | j                            t
          j                   | j                            t
          j                   t           j                                        | _	        | j	                            t
          j
                   | j	                            t
          j                   t          j                                        | _        d| _        i | _        i | _        || _        t'          j        d          | _        t-          j        d          | _        t
          j        | _        i | _        i | _        i | _        i | _        i | _        d| _        d S )NTverbs ) 
tashaphynestemmingArabicLightStemmercomp_stemmerset_prefix_listSVCCOMP_PREFIX_LISTset_suffix_listCOMP_SUFFIX_LISTconj_stemmerCONJ_PREFIX_LISTCONJ_SUFFIX_LISTalyahmorverb_affixer	generatorallow_syntax_lastmarkcompatibility_cacheverb_dict_cachedebugarabicdictionaryArabicDictionaryverb_dictionaryr   custom_verb_dictionaryVERB_STAMP_PATverb_stamp_patstamp_cache
verb_cacheverbclass_cachestripped_words_cacheverb_conj_cache
error_codeselfr   s     O/var/www/html/speakWrite/venv/lib/python3.11/site-packages/qalsadi/stem_verb.py__init__zVerbStemmer.__init__"   s>   &/BBDD 	))#*>???))#*>??? '/BBDD 	))#*>???))#*>??? ".;;==%)" $& !
  0@II '8&I'&R&R#!0!$&!!    c                     | j         S )z?
        Return error code when word is not recognized
        r+   r-   s    r.   get_error_codezVerbStemmer.get_error_codeO   s     r0   c                 &    | j         s	|| _         dS dS )z<
        set error code when word is not recognized
        Nr2   )r-   r+   s     r.   set_error_codezVerbStemmer.set_error_codeW   s#      	)(DOOO	) 	)r0   c                     || j         v r| j         |         S | j                            |          }|| j                            |          z  }|| j         |<   |S ))
        lookup for word in dict
        )r'   r"   lookup_by_stampr#   )r-   wordresults      r.   r9   zVerbStemmer.lookup_by_stamp^   s`     4?""?4(()99$??Fd1AA$GGGF$*DOD!r0   c                 j   | j                             |          }|                    t          j        d          }t          |          dk    rdS || j        vrA| j                             |          }|| j                            |          z  }|| j        |<   | j        	                    |d          S )r8   r      F)
r"   
word_stampreplacearTEHlenr&   exists_as_stampr#   get)r-   r:   stampr;   s       r.   rC   zVerbStemmer.exists_as_stampj   s     $//55bfb)) u::>>5((()99$??Fd1AA$GGGF&,DU###E5111r0   c                    |sdS g }|g|                      |          z   }t          t          |                    }| j        }g }|D ]}| j                            |          }|D ]}|d|d                  }	||d         |d                  }
||d         d         }|t          j        v rt          j        |         d         }|}|
g|                     |
|          z   }t          |          }|D ]}||	|||d}|
                    |            |rt          d           |r8t          t          |                     t          t          |                     g }|D ]}|d         }| j                            |          }|                     ||t          j                  }|D ]}|d         |d         z
  dk    rh|                                }|d|d                  |d	<   ||d         |d                  |d
<   ||d         d         |d<   |
                    |           |}g }|D ]s}|d         }	|d         }d                    |d	         |d         g          }|                     |	||          r'|
                    |                                           t|}g }|D ]D}|                     |d
                   r'|
                    |                                           E|rt          d           |r8t          t          |                     t          t          |                     |}g }|D ]N}|                     |d
         |d                   }|rt          d           |r8t          t          |                     t          t          |                     |                     |d
         |          }|rt          d           |r8t          t          |                     t          t          |                     |D ]t}|                                }|d         |d<   |d         |d<   |                    dd          |d<   t          |d         dv           |d<   |
                    |           uP|rt          d           |r8t          t          |                     t          t          |                     |}g }|D ]}|                     |d         |d         |d	         dz   |d         z   |d         |d         |d         |d                   }|D ]B}|                                }|                                |d<   |
                    |           C|rt          d           |rDt          t          |                     d |D             }t          t          |                     |}|D ]}|d         }|                     |d         |d         |d                   }d}|d         rd nd!} |D ]\  }!}"}#|                     ||	|          }$|
                    t7          j        i d"|d         d#|d         |d	         |d         |d         fd$|d
         dt;          j        |                    dd                    d%|d         d&|d         d|!d'|"d(|$d)|d*|d+                             d*d          d,|d+                             d,d          d-|d+                             d-d          d.|d/                             d0d          d1|d/                             d1d          d2|d/                             d2d          d3|d/                             d3d          |d         |d0         |d4         d5| dd6                     |S )7z
        Stemming verb
        @param verb_in: given verb
        @type verb_in: unicode
        @return : stemmed words
        @rtype:
        Nr   r   first)verbproenc	stem_comp
trans_compzafter first levelrK      prefix	stem_conjsuffixrI   rJ   -zafter second levelrL   zinfinitive candidat verbszvalid infinitive candidat verbsrH   infharakarootr   
transitiveyr   zafter lookup dictconjzafter generating conjugationc                     g | ]
}|d          S )rX    ).0items     r.   
<listcomp>z-VerbStemmer.stemming_verb.<locals>.<listcomp>2  s    777dT&\777r0   	vocalizedVerbrW   nr:   affixstemoriginallemmasemivocalizedtagstypenumberpronoun_tagsgenderpersontense2
tense_tagstensevoicemood	confirmedpronounfreqverb)rU   rn   rr   freqoriginaltagssyntax)get_verb_variantslistsetr   r   segmentr   TABLE_DOUBLE_TRANSITIVE_SUFFIXget_in_stem_variantsboolappendprintr   r   r   verify_affixVERBAL_CONJUGATION_AFFIXcopyjoin _VerbStemmer__check_clitic_affixrC   )_VerbStemmer__get_infinitive_verb_by_stem%_VerbStemmer__verify_infinitive_verbsrD   &_VerbStemmer__generate_possible_conjugvocalizeprepare_tagsr   WordCaser@   normalize_hamza)%r-   verb_indetailed_result	verb_listr   word_segmented_listrH   list_seg_compseg	procliticrb   encliticfirstsuffix	list_stemtransitive_compstmword_segtmp_listverb2list_seg_conjseg_conjword_seg_l2
affix_conjinfverb_dictr\   word_seg_l3one_correct_conjrX   word_seg_l4conjsvocal_tuple_listtag_typeoriginal_tagsr^   re   __rf   s%                                        r.   stemming_verbzVerbStemmer.stemming_verbz   sQ	     	4
""7++,	 Y((	
  	9 	9D -55d;;M$ 9 9 3q6N	CFSVO,A> sAAA"%"DX"Nw"WK*H!FT%>%>tX%N%NN	"&x..$ 9 9C $('%(&5   H (..x888899.  	'%&&& 	4 %..!!!+122333+ 	1 	1H[)E !-55e<<M !--}c&B M
 * 1 1QK(1+-!33 #+--//K,1-HQK-,@K)/4Xa[8A;5N/OK,,1(1+--,@K)OOK0001 '+ 	1 	1H IH8H#5x7I"JKKJ((HjII 1000 '+ 	1 	1H##H[$9:: 1000 	(&''' 	)%..!!!+h''((( '+ '	- '	-H  ==%x'= L  31222 1eGnn%%%k,//000
  99%| L  97888 1eGnn%%%k,//000$ - -&mmoo%)&\E"(,XH%&*hhvr&:&:F#,0l1Cx1O,P,PL),,,,-  	'%&&& 	)%..!!!+h''((( '+ 	- 	-H  $>>%"S(8H+=="&    ) - -&mmoo&*iikkF#,,,,-  	20111 	&%..!!!77h777E+e$$%%% ' , /	 /	HF#D#}}[!8E?HUO    H#'#5>CC3M0@ & &,	=" ((y(CC&&%"HV$4# ( ( 2 ( 2 (	& #H[$9 #B$6x||FB7O7O$P$P 'V $T&\ ( ,] #D #H  %d>&:&>&>x&L&L!" %d>&:&>&>x&L&L#$ %d>&:&>&>x&L&L%& %d<&8&<&<Wb&I&I'( $T,%7%;%;GR%H%H)* #D$6$:$:62$F$F+, (l);)?)?R)P)P-. +/|*<%)']'+I$.,9&(9   ! ! ! !&P r0   c                    |d         |d         g}|                     t          j                            |i                               dg                      |                     t          j                            |i                               dg                      d |D             }d                    |          }|S )zprepare tags to be displayedrn   rr   rf   c                     g | ]}||S rZ   rZ   )r[   ts     r.   r]   z,VerbStemmer.prepare_tags.<locals>.<listcomp>t  s    %%%a1%%%%r0   :)extendr   COMP_PREFIX_LIST_TAGSrD   COMP_SUFFIX_LIST_TAGSr   )r-   rX   r   r   rf   s        r.   r   zVerbStemmer.prepare_tagsl  s     WtI/C-11)R@@DDVRPPQQQC-11(B??CCFBOOPPP &%4%%%xx~~r0   c           	         d                     |t          |          g          }|| j        v r| j        |         S g }|                     |          }t	          |          r>|D ];}|                    |d         |d         |d         |d         |d         d           <|}g }|D ]#}|d         dv s|s|                    |           $|| j        |<   |S )	a0  
        Get the infinitive verb form by given stem, and transitivity
        @param verb: the given verb
        @type verb; unicode
        @param transitive: tranitive or intransitive
        @type transitive: boolean
        @return : list of infinitive verbs
        @rtype: list of unicode
        r   r^   rU   future_typestampedrT   )rH   rU   rS   rE   rT   rV   )r   strr   r9   rB   r~   )	r-   rH   rU   verb_keylisteverb_id_list
verb_tuple	listetempr\   s	            r.   __get_infinitive_verb_by_stemz)VerbStemmer.__get_infinitive_verb_by_stemx  s    88T3z??344t+++'11 ++D11| 	* 
 

 *; 7&0&>",]";!+I!6 *6 2     	 	# 	#DL!X--Z-T""").X&r0   c                     || _         dS )z
        Set the debug attribute to allow printing internal analysis results.
        @param debug: the debug value.
        @type debug: True/False.
        N)r   r,   s     r.   	set_debugzVerbStemmer.set_debug  s     


r0   c                     d| _         dS )zX
        Enable the syntaxic last mark attribute to allow use of I'rab harakat.
        TNr   r3   s    r.   enable_syntax_lastmarkz"VerbStemmer.enable_syntax_lastmark  s     &*"""r0   c                     d| _         dS )zY
        Disable the syntaxic last mark attribute to allow use of I'rab harakat.
        FNr   r3   s    r.   disable_syntax_lastmarkz#VerbStemmer.disable_syntax_lastmark  s     &+"""r0   c                     g }|                      |          }|D ]#}|d         |k    r|                    |           $|S )a  
        verify if given infinitive verbs are compatible with stem_conj
        @param stem_conj: the stemmed verbs without conjugation affixes.
        @type stem_conj: unicode.
        @param infverb_dict: list of given infinitive verbs,
        each item contain 'verb' and 'type'.
        @type infverb_dict: list of dicts.
        @return: filtred  infinitive verbs
        @rtype: list of dict
        rE   )
verb_stampr~   )r-   rO   r   tmp
stem_stampr\   s         r.   __verify_infinitive_verbsz%VerbStemmer.__verify_infinitive_verbs  sR     __Y//
  	! 	!D G}
**

4   
r0   c                    t          j        |          }t          j        |          }|                    t           j                  r
|dd         }|dd         |dd         k    r
|dd         }| j                            d|          S )a>  
        generate a stamp for a verb,
        the verb stamp is different of word stamp, by hamza noralization
        remove all letters which can change form in the word :
            - ALEF,
            - YEH,
            - WAW,
            - ALEF_MAKSURA
            - SHADDA
        @return: stamped word
        r   Nr   )r@   strip_tashkeelr   
startswithHAMZAr%   subr-   r:   s     r.   r   zVerbStemmer.verb_stamp  s      &&!$''??28$$ 	8D9RU##9D"&&r4000r0   c                    d                     |||g          }|| j        v r| j        |         S |s|sdS d}|sd}nh|t          j        v rZ|dk    rd}nQt          j                            |g           D ].}|d         t          j                            |d          v rd} n/d}|r|sd| j        |<   dS |t          j        v rw|dk    rd| j        |<   dS t          j                            |g           D ],}|d         t          j                            |d          v r n-d| j        |<   dS d| j        |<   dS d| j        |<   dS )a  
        Verify if proaffixes (sytaxic affixes) are compatable with affixes
        (conjugation)
        @param proclitic: first level prefix.
        @type proclitic: unicode.
        @param enclitic: first level suffix.
        @type enclitic: unicode.
        @param affix: second level affix.
        @type affix: unicode.
        @return: compatible.
        @rtype: True/False.
        r   TFrQ   r   r   r   )r   r   r   EXTERNAL_PREFIX_TABLETABLE_AFFIXrD   EXTERNAL_SUFFIX_TABLE)r-   r   r   ra   comp_keyproclitic_compatibler\   s          r.   __check_clitic_affixz VerbStemmer.__check_clitic_affix  s    88Y%899t///+H55 %	$ %	$4#(  5'+$$c777C<<+/(( # 3 3E2 > > 5 57c&?&C&CIr&R&RRR370!E S 05,# $ $9=D,X64!:::||=A0:#t$'O$7$7r$B$B ) )D#Aw#*C*G*GRT*U*UUU %  V BGD4X>#(5=A0:#t-2 *ur0   c           	         d                     ||||t          |          g          }|| j        v r| j        |         S |s|t          j        j        v rd| j        |<   dS |s|sd| j        |<   dS |r|t          j        j        v rd| j        |<   dS |r"|t          j                            |d          v r0|r"|t          j                            |d          v rd| j        |<   dS d| j        |<   dS )zI
        test if the given tenses are compatible with proclitics
        r   FTr   )	r   r   r   r   qutrubVerbConstTablePassiveTenser   rD   r   )r-   r   r   rn   rr   rU   r   s          r.   __check_clitic_tensez VerbStemmer.__check_clitic_tense  s/   
 88Y%#j//RSSt///+H55 	es':'LLL16D$X.5 	 	15D$X.4
  	!4!FFF16D$X.5 	"c&?&C&CIr&R&RRR S&#*C*G*GRT*U*UUU15D$X.4 27D$X.5r0   c                     d                     ||t          |          g          }|| j        v r| j        |         S t          j                            |||          }|| j        |<   |S )zG
        return the verb class, used to reduce verb class init
        rQ   )r   r   r(   	libqutrub	classverb	VerbClass)r-   infinitive_verbrU   r   keyvbcs         r.   _get_verbclasszVerbStemmer._get_verbclassD  sq     hhc*ooFGG$&&&',,%//[ C ),D %Jr0   c                     |j                             ||          }|s|                    ||          }|                     |          }||fS )z4
        return the conjugation from a verb
        )conj_displayget_conjconjugate_tense_for_pronoun_strip_tashkeel)r-   verb_objectrn   rr   rX   conj_nms         r.   _get_conjugationzVerbStemmer._get_conjugationR  sW     '00@@ 	K::5'JJD&&t,,W}r0   rQ   Tc                    g }|dk    s|dk    s|dk    rt                      S |                     |||          }	|                    t          j                  r
|dd         }g }
|t
          j        v r0t
          j        |         D ]}|
                    |d                    t          t          |
                    }
|t
          j        v rt
          j        |         D ]}|d         }|d         }| 	                    |||||          }|rd| 
                    |	||          \  }}||k    rD|                    ||||	                    |          |	                    |          |||d           |S )zL
        generate possible conjugation for given verb to be stemmed
        r   r   Nr   )rH   rn   rr   ri   rm   r^   unvocalizedrU   )ry   r   r   r@   ALEFr   r   r~   rx    _VerbStemmer__check_clitic_tenser   get_pronoun_featuresget_tense_features)r-   r   unstemed_verbra   r   extern_prefixextern_suffixrU   list_correct_conjr   tensespairrn   rr   testconj_vocalizedr   s                    r.   __generate_possible_conjugz&VerbStemmer.__generate_possible_conjug_  s    b  MR$7$75B;;55L !!/:{KK BG$$ 	!""IECO##. ' 'd1g&&&&c&kk""CO##.  Qq'00!=%* 
  .2.C.CCPW.X.X+NG
 -//)00(7).+2030H0H0Q0Q.1.D.DU.K.K-;/6.8	 	   ! r0   c                 $      fd|D             S )ad  
        Verify possible affixes in the resulted segments
        according to the given affixes list.
        @param word: the input word.
        @type word: unicode.
        @param list_seg: list of word segments indexes (numbers).
        @type list_seg: list of pairs.
        @return: list of acceped segments.
        @rtype: list of pairs.
        c                 ~    g | ]9}d                      d|d                  |d         d         g          v 7|:S )rQ   Nr   r   )r   )r[   s
affix_listr:   s     r.   r]   z,VerbStemmer.verify_affix.<locals>.<listcomp>  sS     
 
 
388T&AaD&\4!<,H#I#IZ#W#WA#W#W#Wr0   rZ   )r:   list_segr   s   ` `r.   r   zVerbStemmer.verify_affix  s4    
 
 
 
 

 
 
 	
r0   c                 :   g }t           j        | v r|                    |                     t           j        t           j        dz                       |                    |                     t           j        t           j        t           j        z                        |S )z#return modified forms of input verb   )r@   
ALEF_MADDAr~   r?   ALEF_HAMZA_ABOVEr   r   )rH   r   s     r.   rw   zVerbStemmer.get_verb_variants  sw     	=D  T\\"-9Lq9PQQRRRT\\"-BG9KLLMMM r0   c                    g }|r|                      t          j        t          j        z   t          j        z             r|                    | dd                    n|                      t          j                  r#|                    | t          j        z              nI|                      t          j                  r*|                    | dd         t          j        z              t          j        | v r|                    | 	                    t          j        t          j
        dz                       |                    | 	                    t          j        t          j        t          j        z                        |                    | 	                    t          j        t          j
        t          j        z                        |S )z#return modified forms of input stemNr   r   )endswithr@   rA   MEEMWAWr~   r   ALEF_MAKSURAr   r?   r   r   )rb   r   r   s      r.   r|   z VerbStemmer.get_in_stem_variants  s\    	 	>}}RVbg-677 >  crc++++rv&& >  0000rw'' >  crcR_!<=== =D  T\\"-9Lq9PQQRRRT\\"-BG9KLLMMMT\\"-9Lrw9VWWXXXr0   c                     |t           j        t           j        z   k    rW|                     t           j                  s|                     t           j                  rt           j        t           j        z   }|S )u  
        Get the enclitic variant to be joined to the word.
        For example: word  =  أرجِهِ , enclitic = هُ.
        The enclitic  is convert to HEH+ KAsra.
        اعبارة في مثل أرجه وأخاه إلى يم الزينة
        @param word: word found in dictionary.
        @type word: unicode.
        @param enclitic: first level suffix vocalized.
        @type enclitic: unicode.
        @return: variant of enclitic.
        @rtype: unicode.
        )r@   HEHDAMMAr  KASRAYEH)r:   r   s     r.   get_enclitic_variantz VerbStemmer.get_enclitic_variant  sY     rv(((MM"(## )'+}}RV'<'< ) v(Hr0   c                 :    | j                             |||          S )a  
        Join the  verb and its affixes, and get the vocalized form
        @param verb: verb found in dictionary.
        @type verb: unicode.
        @param proclitic: first level prefix.
        @type proclitic: unicode.
        @param enclitic: first level suffix.
        @type enclitic: unicode.
        @return: (vocalized word, semivocalized).
        @rtype: (unicode, unicode).
        )r   r   r   r   r  r   r  r@   r  r   r  r   strip_lastharaka)r-   rH   r   r   enclitic_vocproclitic_vocr^   re   s           r.   r   zVerbStemmer.vocalize  s     ~&&tYAAAr0   c                 f    || j         vrt          j        |          | j         |<   | j         |         S )z;
        reduce amount of calculate strip tashkeel
        )r)   r@   r   r   s     r.   r   zVerbStemmer._strip_tashkeel  s8     t000.0.?.E.ED%d+(..r0   N)F)__name__
__module____qualname____doc__r/   r4   r6   r9   rC   r   r   r   r   r   r   r   r   r   r   r   r   r@   FATHAr   staticmethodr   rw   r|   r  r   r   rZ   r0   r.   r	   r	      s        + + + +Z  ) ) )
 
 
2 2 2 p p pd
 
 
* * *X  * * *+ + +  *1 1 1.8 8 8t$ $ $L    $ H=! =! =! =!~ 
 
 \
" 	 	 \	   \*   \(* * *</ / / / /r0   r	   c                  *   g d} t                      }|                    d           | D ]B}|j                            |           t	          |j                                                   C| D ]}|                    |          }|D ]}|j                                        D ]Q}t	          d	                    |t          |j        |                   g                              d                     Rt	                       t	                       dS )z
    Test main)u
   يضربهu
   يضربكu
   استقلu   ويستخدمونهاu   اتركنيT	utf8N)r	   r   r   rz   r   get_affix_listr   __dict__keysr   unicodeencode)wordlistverbstemmerr:   r;   analyzedr   s         r.   mainlyr"    s.     H --K$ 9 9 ((...k&55778888  **400 	 	H  (--// X Xdiigh.?.D&E&E FGGNNvVVWWWWGGGGGGG	 r0   __main__)r  pyarabic.arabyarabyr@   pyarabic.arabreprr   tashaphyne.stemmingr   alyahmor.aly_stem_verb_constaly_stem_verb_constr   alyahmor.verb_affixerr   libqutrub.classverbr   arramooz.arabicdictionaryr    print_debugr   r   r   r   r	   r"  r  rZ   r0   r.   <module>r.     s          # # # # # #     * * * * * *         4 4 4 4 4 4 $ $ $ $ $ $            n/ n/ n/ n/ n/ n/ n/ n/b  < z
FHHHHH r0   