
    -iV                     R    d Z ddlZddlmZ ddlZd Zd Zd Z	 G d d          Z
dS )z
Arabic noun stemmer
    Nc                 $      fd|D             S )a@  
    Verify possible affixes in the resulted segments according
    to the given affixes list.
    @param word: the input word.
    @type word: unicode.
    @param list_seg: list of word segments indexes (numbers).
    @type list_seg: list of pairs.
    @return: list of acceped segments.
    @rtype: list of pairs.
    c                 ~    g | ]9}d                      d|d                  |d         d         g          v 7|:S -Nr      join.0s
affix_listwords     R/var/www/html/speakWrite/venv/lib/python3.11/site-packages/qalsadi/noun_affixer.py
<listcomp>z verify_affix.<locals>.<listcomp>!   sL    XXX!388T&AaD&\4!<,H#I#IZ#W#WA#W#W#W     r   list_segr   s   ` `r   verify_affixr      s$     YXXXXxXXXXr   c                 `   |}d|v r
| d         rdS | d         dv rd|v rdS d|v rdS d|v rdS |d	v r
| d
         sdS |dv r
| d         sdS d|vr
| d         sdS |                     d          r
| d         sdS |t          j        k    r
| d         sdS |t          j        k    r| d         dk    rdS dS )aF  
    Test if the given word from dictionary is compabilbe with affixes tags.
    @param noun_tuple: the input word attributes given from dictionary.
    @type noun_tuple: dict.
    @param affix_tags: a list of tags given by affixes.
    @type affix_tags:list.
    @param proclitic_nm: first level prefix vocalized.
    @type proclitic_nm: unicode.
    @param enclitic_nm: first level suffix vocalized.
    @type enclitic_nm: unicode.
    @param suffix_nm: first level suffix vocalized.
    @type suffix_nm: unicode.
    @return: if the tags are compatible.
    @rtype: Boolean.
    
   تنوينmamnou3_sarfFnumber)   جمعu   جمع تكسيرu   جمع مؤنث سالمu   جمع مذكر سالمu   مثنى)u   همu   هنu   كماu   كمu   هما	hm_suffix)u   هu   ها	ha_suffix   الk_prefix
kal_prefixw_suffixr   T)endswitharWAWTEH_MARBUTA)
noun_tuple
affix_tagsproclitic_nmenclitic_nm	suffix_nm	proclitics         r   validate_tagsr+   $   s#   " I z!!j&@!u(>>>%335%335##5BBB:VaKbBun$$Z-D$uYz*'=u&!! *\*B u BF:j#9uBN""z(';x'G'Gu 4r   c                 h   t           j        |          d         }t           j        |         d         }t           j        |         d         }|t           j        |                             dd          z  }d|v r
d|v rd|vrdS d|v rd|v rdS d|v rd|v rdS d|v rd|v rdS d|v rd	|v rdS d
|v rd|vrdS dS )ag  
    Verify if proaffixes (sytaxic affixes) are compatable
    with affixes ( conjugation)
    @param proclitic_nm: first level prefix.
    @type proclitic_nm: unicode.
    @param enclitic: first level suffix.
    @type enclitic: unicode.
    @param suffix: second level suffix.
    @type suffix: unicode.
    @return: compatible.
    @rtype: True/False.
    tagscasesr   
   تعريفu   مضافFr   u
   إضافةu   لايضافu   جرu
   مجرورT)SNCCOMP_PREFIX_LIST_TAGSCOMP_SUFFIX_LIST_TAGSCONJ_SUFFIX_LIST_TAGSget)r'   encliticsuffixproclitic_tagsenclitic_tagssuffix_tagss         r   check_clitic_affixr:   [   s   N .|<VDN-h7?M+F3F;K 3,V488"EEEK&&+%%m++u		'	'LK,G,Gu		'	'LK,G,Gu
 
}	$	$)D)Du	}	$	$;)F)Fu	>	!	!l+&E&Eu tr   c                       e Zd Zd Zed             Zedd            Zed             Zed             Zd Z	ed             Z
ed	             Zed
             ZdS )muwaledc                     d S )Nr   )selfs    r   __initzmuwaled.__init   s	     	r   c                    t          | g          }|t          j        t          j        z   t          j        t          j        z   t          j        t          j        t          j        z   t          j        z   fv r$| t          j        z   }|                    |           |r4|t          j        t          j        z   t          j        t          j        z   fv r$| t          j        z   }|                    |           | 	                    t          j                  r,| dd         t          j
        z   }|                    |           | 	                    t          j                  r,| dd         t          j        z   }|                    |           |}|S )u  
        Generate the Noun stem variants according to the affixes.
        For example مدرستي = >مدرست+ي = > مدرسة +ي.
        Return a list of possible cases.
        @param stem: the input stem.
        @type stem: unicode.
        @param suffix_nm: suffix (no mark).
        @type suffix_nm: unicode.
        @return: list of stem variants.
        @rtype: list of unicode.
        N)setr"   ALEFTEHYEHr$   addNOONr#   r!   ALEF_MAKSURAHAMZA	YEH_HAMZA)stemr)   possible_noun_listpossible_nounvalidated_lists        r   get_stem_variantszmuwaled.get_stem_variants   sS   " !
 

 GbfFR^#FFRWrv%	
 
 
 !2>1M""=111 	2I"&27*:BFRW<L)MMM 26MM""=111==   	2 "I7M""=111=="" 	2 "I4M""=111 ,r   Fc                    |}|}|                     t          j                  dk    r-|r+t          j        t          j        t          j        |          }nI|sGt          j        |          r3| dd         t          j        t          j        fv rd}n|rt          j	        }dt          j        |         d         v rt          j        |          }n|}||fS )u  
        Get the suffix variant to be joined to the word.
        For example: word = مدرس, suffix = ة, enclitic = ي.
        The suffix is converted to Teh.
        @param word: word found in dictionary.
        @type word: unicode.
        @param suffix: second level suffix.
        @type suffix: unicode.
        @param enclitic: first level suffix.
        @type enclitic: unicode.
        @param mankous: if the noun is mankous ends with Yeh منقوص.
        @type mankous: boolean.
        @return: variant of suffixes  (vocalized suffix and vocalized
        suffix without I'rab short mark).
        @rtype: (unicode, unicode)
        r   rA   N u
   متحركr-   )findr"   r$   resubrD   	is_harakarE   rC   KASRATANr0   r3   strip_lastharaka)r   r6   r5   mankousr(   	newsuffixsuffix_non_irab_marks          r   get_suffix_variantszmuwaled.get_suffix_variants   s    & 	;;r~&&!+++r~rvv>>II 	(f!5!5 	(BCCyRVRW---		 ( K	 34V<VDDD#%#6y#A#A  #, ...r   c                    | }|                      t          j        t          j        z             r|                    t          j                  r{|                     t          j        t          j        z   t          j                  }|                     t          j        t          j        z   t          j        t          j        z             } | |fS )u  
        Get the enclitix variant to be joined to the word.
        For example: word = كتاب, suffix = كسرة, enclitic = هم.
        The enclitic has a second form هِم.
        @param enclitic_voc: first level suffix vocalized.
        @type enclitic_voc: unicode.
        @param suffix_voc: second level suffix vocalized.
        @type suffix_voc: unicode.
        @return: variant of enclitic  (vocalized enclitic and vocalized
        enclitic without I'rab short mark).
        @rtype: (unicode, unicode)
        )
startswithr"   HEHDAMMAr!   KASRAreplace)enclitic_voc
suffix_vocencl_vo_no_inflect_marks      r   get_enclitic_variantzmuwaled.get_enclitic_variant
  s     #/""26BH#455 	V*:M:Mbh:W:W 	V&2&:&:26BH;Lbf&U&U#'//0A26BHCTUUL444r   c                 J   | }t          j        |          }t          j        |          }||z   }t          j        |          }|                    t           j                  r|                    t           j        t           j        z             r|t           j        t           j        t           j        z   t           j        t           j        z   t           j        z   fv r|dd         t           j        z   }n.|t           j        t           j        z   k    rt          j        |dd                   }n|dk    r|dd         t           j        z   }n|t           j        t           j        z   t           j        t           j        z   t           j        t           j        t           j        z   t           j        z   fv rt          j        |dd                   }nR|dk    r|dd         t           j        z   }n3|                    t           j                  r>|dk    r|dd         t           j        z   }n|dk    r|dd         t           j        z   }n|                    t           j	        t           j        z             r|d|vr&d|vr"|s |st          j        |dd                   }n|t           j        t           j
        z   t           j        t           j
        z   fv rt          j        |dd                   }n.|                    t           j                  r|s|r	|                    t           j                  r|dd         t           j        z   }n|                    t           j	                  r|dd         t           j        z   }n|                    t           j        t           j        z             s9|                    t           j        t           j        z   t           j        z             r6|                    t           j                  r|dd         t           j        z   }|S )u  
        Get the word variant to be joined to the suffix.
        For example: word = مدرسة, suffix = ي. The word is converted to مدرست.
        @param word: word found in dictionary.
        @type word: unicode.
        @param proclitic: proclitic ( first level).
        @type proclitic: unicode.
        @param suffix: suffix ( first level).
        @type suffix: unicode.
        @param enclitic: enclitic( second level).
        @type enclitic: unicode.
        @return: variant of word.
        @rtype: unicode.
        NrA   rQ   r   u   لل)r"   strip_tashkeelrW   r!   r$   rC   rE   rD   rH   r`   rG   r#   rI   r]   r_   	WAW_HAMZArJ   SUKUNFATHATAN)r   r*   r6   r5   	word_stemr)   r(   long_suffix_nms           r   get_word_variantzmuwaled.get_word_variant   s     	%f--	'11"[0'	22	 bn-- @	:!!"'BN":;; 4FFR^+FRW$rv-!  
 !*#2# 7II"'BF"222 " 3IcrcN C CII#r)) )#2# 7I"& ' 26)	   /	#2#??		  2%%%crcNRV3	00 $	: B%crcNRV3		""%crcNRW4	26 122 	:
 i'')++# ,! , /	#2#??		 rv/"'1ABBB/	#2#??	 )) 		:y 		:K 		:  ** :%crcNR\9		""28,, :%crcNR\9		""26BH#455:%%bfrx&7"(&BCC: ##BK00: &crcNR\9	r   c                    t           j        |         d         d         }t           j        |         d         d         }|                     ||          \  }}|}t	          j        |dd                   r
|dd         }|                    t          j        t          j                  }dt           j        |         d         v rst	          j	        |d                   rYd
                    |d         t          j        |dd         g          }|                    t          j                  r
|dd         }|                    t          j        t          j        t          j        z             }|                    t          j        t          j        t          j        z             }t!          j        d	t          j        z  t          j        |          }t!          j        d
t          j        z  d|          }|                    t          j        t          j        z             rdnd}	|                     ||||          }|                     ||||	          \  }}
d
                    |||
|g          }d
                    ||||g          }d
                    ||||g          }t	          j        |          }t           j        D ]1\  }}|                    ||          }|                    ||          }2|||fS )a  
        Join the  noun and its affixes, and get the vocalized form
        @param noun: noun found in dictionary.
        @type noun: unicode.
        @param proclitic: first level prefix.
        @type proclitic: unicode.

        @param suffix: second level suffix.
        @type suffix: unicode.
        @param enclitic: first level suffix.
        @type enclitic: unicode.
        @return: vocalized word.
        @rtype: unicode.
        	vocalizedr   rA   Nr/   r-   rQ   r   z(%s)+z^(%s)+TFr   )r0   r1   r2   re   r"   rU   ra   rk   FATHAis_sunr	   SHADDAr!   rj   rC   rH   rS   rT   r`   rE   rn   r[   rh   AJUST_VOCAL_PATTERNS)r>   nounr*   r6   r5   proclitic_vocrb   encl_voc_non_inflectrc   rX   rZ   word_non_irab_markword_vocalized	segmentedpatrnrepls                   r   vocalizezmuwaled.vocalize}  s     1)<[I!L 0:;GJ-1-F-F&.
 .
** 
 <RSS	"" 	9D ||BK22 34Y?GGGBIGM
 M
G 77DGRYQRR9::D%%bh// 3 -crc 2 ||BGRX%788 ||BORX-GHHvg("(D99vh)2t44 --26(9::E$$$$T9fhGG
 ,0+C+C*h,
 ,
(
(  WWD"68LM
 
 -z<!PQQHHmT:|LMM	%i00	3 	I 	IKE4+33E4@@N!3!;!;E4!H!H19<<r   c                 :   g }t           j        | v r|                    |                     t           j        t           j        dz                       |                    |                     t           j        t           j        t           j        z                        |S )zgenerate noun varaintes   )r"   
ALEF_MADDAappendra   ALEF_HAMZA_ABOVErI   rC   )ru   	noun_lists     r   get_noun_variantszmuwaled.get_noun_variants  su     	=D  T\\"-9Lq9PQQRRRT\\"-BG9KLLMMMr   c                 z   g }|r|                      t          j                  r+|                    | dd         t          j        z              nl|                      t          j                  r*|                    | dd         t          j        z              n"|                    | t          j        z              |S )zgenerate stem varaintesNrA   )r!   r"   rC   r   rH   rD   r$   rE   )rK   r(   	list_stems      r   get_input_stem_variantszmuwaled.get_input_stem_variants  s     	 	,}}RW%% =  crcR_!<====rv&& =  crcR^!;<<< TBF]+++r   c                 $      fd|D             S )ad  
        Verify possible affixes in the resulted segments according
        to the given affixes list.
        @param word: the input word.
        @type word: unicode.
        @param list_seg: list of word segments indexes (numbers).
        @type list_seg: list of pairs.
        @return: list of acceped segments.
        @rtype: list of pairs.
        c                 ~    g | ]9}d                      d|d                  |d         d         g          v 7|:S r   r   r
   s     r   r   z(muwaled.verify_affix.<locals>.<listcomp>  sS     
 
 
388T&AaD&\4!<,H#I#IZ#W#WA#W#W#Wr   r   r   s   ` `r   r   zmuwaled.verify_affix  s4    
 
 
 
 

 
 
 	
r   N)F)__name__
__module____qualname___muwaled__initstaticmethodrO   r[   re   rn   r}   r   r   r   r   r   r   r<   r<      s          
 , , \,\ &/ &/ &/ \&/P 5 5 \5* Z Z \Zx[= [= [=z   \   \$ 
 
 \
 
 
r   r<   )__doc__rS   pyarabic.arabyarabyr"   stem_noun_constr0   r   r+   r:   r<   r   r   r   <module>r      s     
			          Y Y Y4 4 4nN N NbY
 Y
 Y
 Y
 Y
 Y
 Y
 Y
 Y
 Y
r   