
    -iV                         d Z ddlmZ ddlmZ ddlmZ ddlmZ d Z	d Z
d	 Z G d
 d          Zedk    r ed           dS dS )zQ
stemNode represents the regrouped data resulted from the
morpholocigal analysis
    )CounterN   )WordCase)StemmedWordc                 
    | dv S )Nu$   !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~،؟ words    N/var/www/html/speakWrite/venv/lib/python3.11/site-packages/qalsadi/stemnode.pyispunctr      s    ;;;    c                 H    t          t          |           | j                  S )N)key)maxsetcount)Lists    r   most_frequent2r      s    s4yydj))))r   c                 |    t          t          |                     }|                    d          d         d         S )Nr   r   )r   sortedmost_common)r   occurence_counts     r   most_frequentr   "   s3    fTll++O&&q))!,Q//r   c                      e Zd ZdZd.dZd Zd Zd Zd Zd Zd	 Z	d
 Z
d Zd Zd Zd Zd Zd Zd Zd Zd Z	 d/dZd0dZd Zd Zd Zd Zd Zd Zd Zd Zd Zd  Zd! Z d" Z!d# Z"d$ Z#d% Z$d& Z%d' Z&d( Z'd) Z(d* Z)d+ Z*d, Z+d-S )1StemNodez]
    stemNode represents the regrouped data resulted from the
    morpholocigal analysis
    Fc                 .   || _         t          |          | _        g }|D ]O}t          |t                    r#|                    t          |                     :|                    |           P|}d| _        i | _        i | _	        g | _
        |rPd |D             | _
        t          t          | j
                            | _
        | j
                                         g | _        |rPd |D             | _        t          t          | j                            | _        | j                                         g | _        |rPd |D             | _        t          t          | j                            | _        | j                                         g | _        |rPd |D             | _        t          t          | j                            | _        | j                                         i | _        d| _        d| _        g g g g g d| _        g g g g d| _        g g g g d| _        g | _        g | _        g g g g g g g d	| _        i | _        i | _        i | _        i | _        t          t=          t          |                              | _        |r|d
                                          | _        tC          |          D ]8\  }}|"                                | j        v r3| j        |"                                                             |           n|g| j        |"                                <   | j        d                             |#                                           |$                                rR| j        d                             |           | j        d                             |#                                           |%                                rR| j        d                             |           | j        d                             |#                                           |&                                rR| j        d                             |           | j        d                             |#                                           |'                                rR| j        d                             |           | j        d                             |#                                           |(                                r| j                            |           n| j                            |           | j        r!tS          | j        d
                   rd| _        |*                                r|+                                r"| j        d                             |           |,                                r"| j        d                             |           ,|-                                r | j        d                             |           b|+                                r"| j        d                             |           |,                                r"| j        d                             |           |-                                r"| j        d                             |           |.                                r | j        d                             |           :t          | j        d                   t          | j        d                   t          | j        d                   t          | j        d                   d| _        t          t          | j        d                             | j        d<   | j        D ]{t          t          | j                                     | j        <   | j         r"fd| j                 D             | j        <   Zfd| j                 D             | j        <   |dS )zJ
        Create the stemNode  from a list of StemmedSynword cases
         c                 6    g | ]}|                                 S r   )get_vocalized.0cases     r   
<listcomp>z%StemNode.__init__.<locals>.<listcomp>K   s$    JJJt1133JJJr   c                 f    g | ].}|                                 d z   |                                z   /S ):)get_tagsget_typer    s     r   r#   z%StemNode.__init__.<locals>.<listcomp>Q   s2    WWWT3.@WWWr   c                 6    g | ]}|                                 S r   )	get_affixr    s     r   r#   z%StemNode.__init__.<locals>.<listcomp>X   s"    CCCDNN,,CCCr   c                 6    g | ]}|                                 S r   )get_rootr    s     r   r#   z%StemNode.__init__.<locals>.<listcomp>_   s     @@@d$--//@@@r   F)verbnounpounctstopwordall)r,   r-   r.   r/   )mansoubmarfou3majrourmajzoumtanwin_mansoubtanwin_marfou3tanwin_majrourr   r0   r,   r-   r/   r.   Tr5   r6   r7   r1   r2   r3   r4   )r,   r-   r/   r.   c                     g | ]}|S r   r   r!   ltype_keys     r   r#   z%StemNode.__init__.<locals>.<listcomp>   s    (V(V(VqX(V(V(V(Vr   c                 <    g | ]}t          j        |          S r   arabystrip_tashkeelr9   s     r   r#   z%StemNode.__init__.<locals>.<listcomp>   s9     ) ) )01h)(++) ) )r   N)/vocalized_lemmalen
case_count
isinstancer   appendr   r
   previous_nodes
next_nodes
vocalizedslistr   sorttagsaffixesroots	originalsguessed_type_tag	break_endlemmas	word_typer   breaks
non_breakssyntax_marksyn_previous	syn_nextssem_previous	sem_nextsrangechosen_indexesget_word	enumerateget_original	get_lemmais_verbis_nounis_stopword	is_pounctis_breakr   	is_tanwin
is_mansoub
is_marfou3
is_majrour
is_majzoum)self	case_listr@   tmp_case_listr"   idxr;   s         @r   __init__zStemNode.__init__0   s   
  /i..  	+ 	+D$)) +$$[%6%67777$$T****!	 	  	#JJ	JJJDO"3t#7#788DOO  """	 	WWYWWWDIS^^,,DIINN  	 CCCCCDLDL 1 122DLL 
 	@@i@@@DJc$*oo..DJJOO  !# 
 
 	
 
 	
 

    
 
  #5Y#8#899
  	0!!--//DI"9-- 0	< 0	<IC   ""dn44t00223::3???? 7t00223
 K%%dnn&6&6777||~~ =v&--c222F#**4>>+;+;<<<||~~ =v&--c222F#**4>>+;+;<<<!! Az*11#666J'..t~~/?/?@@@~~ ?x(//444H%,,T^^-=-=>>>}} ,""3''''&&s+++y &WTYq\22 &!%~~ <??$$ C$%56==cBBBB__&& C$%56==cBBBB__&& C$%56==cBBB??$$ <$Y/66s;;;;__&& <$Y/66s;;;;__&& <$Y/66s;;;;__&& <$Y/66s;;; v.//v.//DN:677$.233
 

 "#dk%&8"9"9::E 	 	H$(T[-B)C)C$D$DDK!# (V(V(V(VDK4I(V(V(VH%%) ) ) )59[5J) ) )H%%	 	r   c                     || _         dS )zz
        Set the case count.
        @param count: the number of stemmed word  cases
        @tyep count: integer
        NrB   ri   r   s     r   set_case_countzStemNode.set_case_count   s      r   c                     | j         S )zu
        get the case count.
        @return: the number of stemmed word  cases
        @tyep count: integer
        ro   ri   s    r   get_case_countzStemNode.get_case_count   s     r   c                     || _         dS )zk
        Set the guessed type tag.
        @param tag: guessed type tag
        @type tag: unicode
        NrN   )ri   tags     r   set_guessed_type_tagzStemNode.set_guessed_type_tag  s     !$r   c                     | j         S )ze
        get the guessed type tag.
        @return: guessed type tag
        @rtype: unicode
        rv   rs   s    r   get_guessed_type_tagzStemNode.get_guessed_type_tag       $$r   c                     | j         S )zi
        get the guessed type tag.
        @return: guessed type tag
        @rtype tag: unicode
        rv   rs   s    r   rt   zStemNode.get_case_count  r{   r   c                     || j         d<   dS )z
        Set the verb count.
        @param count: the number of stemmed word cases as  verbs
        @tyep count: integer
        r,   Nr   rp   s     r   set_verb_countzStemNode.set_verb_count        #
6r   c                     | j         d         S )z}
        get the verb count.
        @return: the number of stemmed word cases as verbs
        @tyep count: integer
        r,   r~   rs   s    r   get_verb_countzStemNode.get_verb_count(       z&!!r   c                     || j         d<   dS )z
        Set the noun count.
        @param count: the number of stemmed word cases as  nouns
        @tyep count: integer
        r-   Nr~   rp   s     r   set_noun_countzStemNode.set_noun_count0  r   r   c                     | j         d         S )z}
        get the noun count.
        @return: the number of stemmed word cases as nouns
        @tyep count: integer
        r-   r~   rs   s    r   get_noun_countzStemNode.get_noun_count8  r   r   c                     || j         d<   dS )z
        Set the stopword count.
        @param count: the number of stemmed word cases as  stopwords
        @tyep count: integer
        r/   Nr~   rp   s     r   set_stopword_countzStemNode.set_stopword_count@  s     "'
:r   c                     | j         d         S )z
        get the stopword count.
        @return: the number of stemmed word cases as stopwords
        @tyep count: integer
        r/   r~   rs   s    r   get_stopword_countzStemNode.get_stopword_countH  s     z*%%r   c                     | j         S )zr
        Get the input word given by user
        @return: the given word.
        @rtype: unicode string
        r	   rs   s    r   r[   zStemNode.get_wordP       yr   c                     || _         dS )z
        Set the input word given by user
        @param newword: the new given word.
        @type newword: unicode string
        Nr	   )ri   newwords     r   set_wordzStemNode.set_wordZ  s     			r   c                 *    t          | j                  S )v
        Get the root forms of the input word
        @return: the given root.
        @rtype: unicode string
        )rH   rL   rs   s    r   	get_rootszStemNode.get_rootsb  s     DJr   c                 .    | j         r| j         d         S dS )r   r   r   )rL   rs   s    r   r+   zStemNode.get_rootl  s     : 	!:a= rr   c                 N    t          | j                                                  S )z~
        Get the original forms of the input word
        @return: the given original.
        @rtype: unicode string
        )rH   rM   keysrs   s    r   r]   zStemNode.get_originalx  s      DN''))***r   r   c                     | j                             || j                             dg                     }| j        sd |D             }t          t	          |                    }|S )zy
        Get all lemmas of the input word
        @return: the given lemmas list.
        @rtype: unicode string
        r0   c                 6    g | ]}t          j        |          S r   r=   )r!   r:   s     r   r#   z'StemNode.get_lemmas.<locals>.<listcomp>  s#    >>>!e*1-->>>r   )rP   getr@   rH   r   )ri   wordtyperP   s      r   
get_lemmaszStemNode.get_lemmas  s_     $+//%*C*CDD# 	?>>v>>>Fc&kk""r   c                 |   d}d}| j                             dg           rt          | j         d                   }d}nyg d}|r5|                                }|dv rd}n|dv rd}n|dv rd}n	|d	v rd
}nd}|g}|D ];}| j                             |g           rt          | j         |                   }|} n<|s|S ||fS )z
        Get a lemma of the input word, you can select a POS tag (n,v,s)
        @return: the given lemmas list.
        @rtype: unicode string
        r   r.   )r/   r-   r,   r0   )s
stop_words	stop_wordr/   )nr-   )ppunct)vr,   r0   )rP   r   r   lower)ri   pos
return_poslemma
lemma_typeword_type_strategyrQ   s          r   r^   zStemNode.get_lemma  s+    
;??8R(( 	!$+h"788E!JJ "E!D!D iikk:::$CCF]] CC    #CCF]] CCC &" 0  	;??9b11 )$+i*@AAE!*JE  	'L:&&r   c                 *    t          | j                  S )v
        Get the affixes of the input word
        @return: the given affixes.
        @rtype: unicode string
        )rH   rK   rs   s    r   get_affixeszStemNode.get_affixes  s     DL!!!r   c                 .    | j         r| j         d         S dS )r   r   N)rK   rs   s    r   r)   zStemNode.get_affix  s$     < 	#<?"	# 	#r   c                     | j         S )z
        Get the vocalized forms of the input word
        @return: the given vocalizeds.
        @rtype: list of unicode string
        )rG   rs   s    r   get_vocalizedszStemNode.get_vocalizeds  s     r   c                     | j         S )zw
        Get the tags of the input word
        @return: the tags list.
        @rtype: list of unicode string
        )rJ   rs   s    r   r&   zStemNode.get_tags  r   r   c                     | j         S )
        Get the chosen_indexes forms of the input word
        @return: the given chosen_indexes.
        @rtype: unicode string
        )rZ   rs   s    r   get_chosen_indexeszStemNode.get_chosen_indexes  s     ""r   c                 F    |D ]}|| j         k    s|dk     r dS || _        dS )r   r   N)rB   rZ   )ri   indexesis      r   set_chosen_indexeszStemNode.set_chosen_indexes  sD      	* 	*ADO##q1uu (- #*Dr   c                 $    | j         d         dk    S )z
        Return if all cases are verbs.
        @return:True if the node has verb in one case at least.
        @rtype:boolean
        r,   r   r~   rs   s    r   has_verbzStemNode.has_verb       z&!A%%r   c                 $    | j         d         dk    S )z
        Return if all cases are nouns.
        @return:True if the node has noun in one case at least.
        @rtype:boolean
        r-   r   r~   rs   s    r   has_nounzStemNode.has_noun  r   r   c                 $    | j         d         dk    S )z
        Return if all cases are stopwords.
        @return:True if the node has stopword in one case at least.
        @rtype:boolean
        r/   r   r~   rs   s    r   has_stopwordzStemNode.has_stopword  s     z*%))r   c                 $    | j         d         dk    S )z
        Return if all cases are pounctuations
        @return:True if the node has pounctation in one case at least.
        @rtype:boolean
        r.   r   r~   rs   s    r   	has_punctzStemNode.has_punct#  s     z(#a''r   c                 p    | j         d         o)| j         d          o| j         d          o| j         d          S )z
        Return if all cases are verbs.
        @return:True if the node is verb in alll cases.
        @rtype:boolean
        r,   r.   r/   r-   r~   rs   s    r   r_   zStemNode.is_verb-  sK     Jv 'Jx(('Jz**' Jv&&		
r   c                 p    | j         d          o(| j         d          o| j         d          o| j         d         S )z
        Return if all cases are nouns.
        @return:True if the node is noun in alll cases.
        @rtype:boolean
        r.   r/   r,   r-   r~   rs   s    r   r`   zStemNode.is_noun<  sK     
8$$ #Jz**#Jv&&# 
6"		
r   c                 p    | j         d          o(| j         d         o| j         d          o| j         d          S )z
        Return if all cases are stopwords.
        @return:True if the node is stopword in alll cases.
        @rtype:boolean
        r.   r/   r,   r-   r~   rs   s    r   ra   zStemNode.is_stopwordK  sK     
8$$ '
:&'Jv&&' Jv&&		
r   c                 p    | j         d         o)| j         d          o| j         d          o| j         d          S )z
        Return if all cases are pounctuations
        @return:True if the node is pounctation in alll cases.
        @rtype:boolean
        r.   r/   r,   r-   r~   rs   s    r   rb   zStemNode.is_pounctZ  sK     Jx  'Jz**'Jv&&' Jv&&		
r   c                 r    | j         d         | j         d         k    o| j         d         | j         d         k    S )z
        Return True if most  cases are verbs.
        @return:True if the node is verb in most cases.
        @rtype:boolean
        r,   r-   r/   r~   rs   s    r   is_most_verbzStemNode.is_most_verbi  s:     JvF!33 <
6"TZ
%;;	
r   c                 r    | j         d         | j         d         k    o| j         d         | j         d         k    S )z
        Return True if most  cases are nouns.
        @return:True if the node is noun in most cases.
        @rtype:boolean
        r-   r,   r/   r~   rs   s    r   is_most_nounzStemNode.is_most_nounw  s:     JvF!33 <
6"TZ
%;;	
r   c                 r    | j         d         | j         d         k    o| j         d         | j         d         k    S )z
        Return True if most cases are stopwords.
        @return:True if the node is stopword in most cases.
        @rtype:boolean
        r/   r,   r-   r~   rs   s    r   is_most_stopwordzStemNode.is_most_stopword  s:     Jz"TZ%77 <
:&F);;	
r   c                 :   |                                  rdS |                                 rdS |                                 rdS |                                 rdS |                                 rdS |                                 rdS |                                 rdS dS )	zh
        Return the word type.
        @return:the word type or mosttype.
        @rtype:string
        r-   r,   r/   r.   mostnounmostverbmoststopword	ambiguous)r`   r_   ra   rb   r   r   r   rs   s    r   get_word_typezStemNode.get_word_type  s     <<>> 	6\\^^ 	6 	:^^ 		8   	:   	:""$$ 	!>;r   c                 >   | j         r	| j        sdS |                                 r| j        s	| j        sdS | j        r	| j         sdS t          | j                  t          | j                   k    rdS t          | j                  t          | j                   k     rdS dS )z
        Return the word break type,
        if the word break the sentences or not.
        @return:the word type or mosttype.
        @rtype:string
        break	non_breakmostNon_break
most_breakr   )rR   rS   r   rW   rX   rA   rs   s    r   get_break_typezStemNode.get_break_type  s     ; 	t 	7   		): 		4> 		7_ 	T[ 	;!!C$4$444"?!!C$4$444<;r   c                     | j         S )z
        The syn node is break end like puctuation, if it  hasn't any syntaxique or semantique
        relation with the previous word
        )rO   rs   s    r   is_break_endzStemNode.is_break_end  s     ~r   c                 .    |                                  dv S )zz
        The syn node is break, if it hasn't any syntaxique or semantique
        relation with the previous word
        )r   	mostBreak)r   rs   s    r   rc   zStemNode.is_break  s     ""$$(>>>r   c           	         d| j         d         d                    | j                  |                                 |                                 | j        d         | j        d         | j        d         fz  }|t          | j                  z  }|t          | j                  z  }|t          | j                  z  }|dt          | j	                  z   z  }|S )Nz$
'%s':%s, [%s-%s]{V:%d, N:%d, S:%d} r
   z, r,   r-   r/   z
Indexes : )
__dict__joinrM   r   r   r   reprrT   rQ   rZ   )ri   texts     r   __repr__zStemNode.__repr__  s    6M&!IIdn%%  !!JvJvJz":
 
 	T%&&&T^$$$T^$$$tD$78888r   N)F)r   )r   F),__name__
__module____qualname____doc__rm   rq   rt   rx   rz   r   r   r   r   r   r   r[   r   r   r+   r]   r   r^   r   r)   r   r&   r   r   r   r   r   r   r_   r`   ra   rb   r   r   r   r   r   r   rc   r   r   r   r   r   r   *   s        
| | | |P       $ $ $% % %% % %# # #" " "# # #" " "' ' '& & &         
 
 
+ + +    /' /' /' /'b" " "	# 	# 	#    # # #* * * & & && & &* * *( ( (
 
 

 
 

 
 

 
 

 
 

 
 

 
 
  2  4  ? ? ?    r   r   __main__zStem Node module)r   collectionsr   pyarabic.arabyr>   wordcaser   stemmedwordr   r   r   r   r   r   printr   r   r   <module>r      s                        $ $ $ $ $ $< < <
* * *0 0 0|
 |
 |
 |
 |
 |
 |
 |
~ z	E
 r   