
    -i                     `   d Z ddlZddlZddlZddlmZ ddlmZ ddlZ	ddl
ZddlZddlZddlmZ ddlZddlZeZddlmZ ddlZddlmZ ddlmZ ej                            d           ej                            d	           d
Z d
Z! G d d          Z"d Z#d Z$e%dk    r e$             dS dS )z
    Arabic Tashkeel Class
    N)and_)count   )unknown_tashkeel)tashkeel_constz../libz../Fc                      e Zd ZdZd,dZd Zd Zd Zd Zd Z	d	 Z
d
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd-dZd Zd Zd  Z d! Z!d.d"Z"d# Z#d$ Z$d% Z%	 	 d/d'Z&d( Z'd) Z(d* Z)	 	 d0d+Z*d&S )1TashkeelClassz#
        Arabic Tashkeel Class
    Fc                 D   t          j        t           j                   t          j        t                    | _        d| _        d| _        d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        d| _        t           j                            |          | _        | j                                         t*          j                            |          | _        | j                                         d| _        t4          j                            |          | _        | j                            t<                     | j                            | j                   tA          j!        | j
                  | _"        tG          j$                    | _%        d S )N)leveli  TF)
cache_path)&loggingbasicConfigERROR	getLogger__name__loggerlimitenabled_last_markenabled_stat_tashkeelenabled_verboseenabled_show_collocation_markselect_by_score_enabledenabled_syntaxic_analysisenabled_ajust_vocalizationenabled_semantic_analysisallow_syntax_last_markqalsadianalexAnalexanalyzerdisable_allow_cache_usesylajoneanasynSyntaxAnalyzeranasyntsyntax_train_enabledasmaianasemSemanticAnalyzer	set_debugdebug	set_limitcollCollocationClasscollor   UnknownTashkeelunknown_vocalizer)selfmycache_paths     N/var/www/html/speakWrite/venv/lib/python3.11/site-packages/mishkal/tashkeel.py__init__zTashkeelClass.__init__1   su   '-0000'11 

 "& &*"  % .3* (-$ *.&
 +/' *.& '+#  ---FF 	--///  55<5PP,,... %*! l333NN 	&&&
+++ *4+MNN
 "2!A!C!C    c                 R    || _         | j                            | j                    dS )z;
        set the limit length of words to vocalize
        N)r   r    r,   )r2   r   s     r4   r,   zTashkeelClass.set_limit   s)     

+++++r6   c                     d| _         dS )z1
        Enable the stat tasheel option.
        TNr   r2   s    r4   enable_stat_tashkeelz"TashkeelClass.enable_stat_tashkeel   s     &*"""r6   c                 P    d| j         _        | j                                         dS )zQ
        Enable the syntaxique relation extraction from a vocalized text
        TN)r%   r&   r    enable_fully_vocalized_inputr:   s    r4   enable_syn_trainzTashkeelClass.enable_syn_train   s*     -1) 	2244444r6   c                     d| _         dS )z2
        disable the stat tasheel option.
        FNr9   r:   s    r4   disable_stat_tashkeelz#TashkeelClass.disable_stat_tashkeel   s     &+"""r6   c                     d| _         dS )z
        enable logger
        TN)r   r:   s    r4   enable_verbosezTashkeelClass.enable_verbose   s      $r6   c                 j    | j                                          | j                                         dS )z9
        disable the morphological use of cache.
        N)r    r!   r%   r:   s    r4   disable_cachezTashkeelClass.disable_cache   s2     	--///,,.....r6   c                 j    | j                                          | j                                         dS )z8
        enable the morphological use of cache.
        N)r    enable_allow_cache_user%   r:   s    r4   enable_cachezTashkeelClass.enable_cache   s2     	,,...++-----r6   c                     | j         S )z
        return the  the stat tasheel option value.
        @return: True if enabled, false else.
        @rtype: boolean.
        r9   r:   s    r4   get_enabled_stat_tashkeelz'TashkeelClass.get_enabled_stat_tashkeel   s     ))r6   c                 F    d| _         | j                                         dS )z>
        Enable the show the collocation mark option.
        TN)r   r/   enable_show_delimiterr:   s    r4   enable_show_collocation_markz*TashkeelClass.enable_show_collocation_mark   s&     .2*
((*****r6   c                 F    d| _         | j                                         dS )z?
        disable the show the collocation mark option.
        FN)r   r/   disable_show_delimiterr:   s    r4   disable_show_collocation_markz+TashkeelClass.disable_show_collocation_mark   s&     .3*
))+++++r6   c                     | j         S )z
        return the  the show the collocation mark option value.
        @return: True if enabled, false else.
        @rtype: boolean.
        )r   r:   s    r4   get_show_collocation_markz'TashkeelClass.get_show_collocation_mark   s     11r6   c                     d| _         dS )z.
        Enable the last mark option.
        TNr   r:   s    r4   enable_last_markzTashkeelClass.enable_last_mark   s     "&r6   c                     d| _         dS )z=
        disable the last mark vocalization  option.
        FNrS   r:   s    r4   disable_last_markzTashkeelClass.disable_last_mark   s     "'r6   c                     | j         S )z
        return the  the last mark vocalization option value.
        @return: True if enabled, false else.
        @rtype: boolean.
        rS   r:   s    r4   get_enabled_last_markz#TashkeelClass.get_enabled_last_mark   s     %%r6   c                     d| _         dS )z6
        Enable the syntaxic analysis option.
        TNr   r:   s    r4   enable_syntaxic_analysisz&TashkeelClass.enable_syntaxic_analysis        *.&&&r6   c                     d| _         dS )z7
        disable the syntaxic analysis option.
        FNrZ   r:   s    r4   disable_syntaxic_analysisz'TashkeelClass.disable_syntaxic_analysis        */&&&r6   c                     | j         S )z
        return the  the syntaxic analysis option value.
        @return: True if enabled, false else.
        @rtype: boolean.
        rZ   r:   s    r4   get_enabled_syntaxic_analysisz+TashkeelClass.get_enabled_syntaxic_analysis        --r6   c                     d| _         dS )z6
        Enable the Semantic analysis option.
        TNr   r:   s    r4   enable_semantic_analysisz&TashkeelClass.enable_semantic_analysis   r\   r6   c                     d| _         dS )z7
        disable the Semantic analysis option.
        FNrd   r:   s    r4   disable_semantic_analysisz'TashkeelClass.disable_semantic_analysis   r_   r6   c                     | j         S )z
        return the  the Semantic analysis option value.
        @return: True if enabled, false else.
        @rtype: boolean.
        rd   r:   s    r4   get_enabled_semantic_analysisz+TashkeelClass.get_enabled_semantic_analysis  rb   r6   c                     d| _         dS )z7
        Enable the Ajust Vocalization option.
        TNr   r:   s    r4   enable_ajust_vocalizationz'TashkeelClass.enable_ajust_vocalization
  s     +/'''r6   c                     d| _         dS )z8
        disable the Ajust Vocalization option.
        FNrk   r:   s    r4   disable_ajust_vocalizationz(TashkeelClass.disable_ajust_vocalization  s     +0'''r6   c                     | j         S )z
        return the  the Ajust Vocalization option value.
        @return: True if enabled, false else.
        @rtype: boolean.
        rk   r:   s    r4   get_enabled_ajust_vocalizationz,TashkeelClass.get_enabled_ajust_vocalization  s     ..r6   c           	         g }| j                             |          }|D ]7}| j                            dt	          t          |                               8|                                 r|| j                            |          \  }}|D ]}| j                            dt	          t          |                               |D ]v}| j                            dt	          |	                                                     | j                            dt	          |
                                                     w|                                 r| j                            |          }|D ]{}|D ]v}| j                            dt	          |	                                                     | j                            dt	          |
                                                     w|||fS )z
        Do the lexical, syntaxic  and semantic analysis of the text.
        @param text: input text.
        @type text: unicode.
        @return: syntaxic and lexical tags.
        rtype: list of list of stemmedSynWord class.
        zSyn len before syntax: %szSyn len: %szSyn sem next before: %szSyn next before: %szSyn sem next: %szSyn next: %s)r    
check_textr   r+   reprlenra   r%   analyzeget_sem_nextget_nextri   r(   )r2   textresultsyndl
synodelistsynds         r4   full_stemmerzTashkeelClass.full_stemmer  s    ))$//  	L 	LEK9$s5zz:J:JKKKK--// 	L!%!5!5f!=!=FJ ! S S!!-SZZ0@0@AAA! S SDK%%&?TEVEVEXEX@Y@YZZZK%%&;D<Q<QRRRRS 1133 5,,V44  L L! L LDK%%&8d>O>O>Q>Q9R9RSSSK%%nT$--//5J5JKKKKL z!!r6   rx   c           
      
   |                      |          }|                                 r0|                     |          }| j                            d|           |g}d}d}g }g }g }	t
          j                                        }
|D ]}|                     |          \  }}d}d}d}d}d}d}| j                            d           t          t          t                      ||                    D ]l\  }}}|dz
  dk    r||dz
           }nd}|dz   t          |          k     r||dz            }nd}|r|r||         |         }|                     |||||           mt          t          t                      ||                    D ]\  }}}|dz
  dk    r||dz
           }nd}|dz   t          |          k     r||dz            }nd}|r|r||         |         }|                     ||||||          }|r|                    |          }nd}|}||         }|}|}|}|                    |           |                                }|	                    |           ֐g }d}d}t%          |          D ]\  }}||                                         }|s||                                         }||                                         }d	                    ||                                         ||                                         g          }||                                                             d	          }|                    ||                                                             d	                     |
                    |           | j        rA| j                            d
||                                         t?          |
                     dt?          |
          d|
                                 ddz   |z   }|dk    r4||         !                    ||         "                                          } nd} tF          j$        j%        &                    | d          } ||         '                                }!tP          r|!dk    r|}| )                                s,d                    || *                    ||          g          }n+d                    || *                    ||          g          }|                    ||d                    |	|                   || |!d           |}| +                                r| ,                    |          }|r| -                    |          }|S |S )z
        Vocalize the text and give suggestion to improve tashkeel by user.
        @param text: input text.
        @type text: unicode.
        @return: vocalized text.
        rtype: dict of dict or text.
        zstat tashkeel %s NFzList of synodes **********r   r   :z0TaharZe Tags to display '%s' and tagmaker =='%s'[z]{}z<br/>    ;)chosensemisuggestinflectlinkrule).pre_tashkeelrI   stat_tashkeelr   r+   mysamtagcodertagCoderr}   listizipr   rt   _TashkeelClass__reduce_cases_TashkeelClass__choose_tashkeelget_next_relationappendget_vocalizeds	enumerateget_vocalizedget_wordget_semivocalizedjoinget_typeget_tags_to_displayget_tagssplitextendencoder   infostrr   get_previous_relation	get_orderr"   	syn_constDISPLAY_RELATIONgetget_ruleUNCERTAIN_TASHKEELrX   displayrp   _ajust_vocalized_resultajust_vocalized_suggestion)"r2   	inputtext
suggestionformat_displaytextsvocalized_textpreviousoutput_suggest_list_chosen_listsuggests_list
mytagmakerrx   detailled_syntaxr{   	next_nodepre_nodeprevious_indexprevious_case_indexprevious_chosen_relationcurrent_indexword_cases_listcurrent_synodecurrent_chosen_case_indexcurrent_chosen_relationcurrent_chosenr   privous_orderivoc_wordsemivocalizedr   tagslistrelationselection_rules"                                     r4   tashkeelzTashkeelClass.tashkeelC  sF    %%i00	
 ))++ 	=**955IK0)<<<  ^,,..
 M	. M	.D+/+<+<T+B+B(j HIH"N"'',$ K:;;;CGUWWUegqHrHrCsCs  > 1$)))-/:HH#H 1$s+;'<'<<< *=? ;II $I! U&9 U/?@STH###^h	    DHUWWUegqHrHrCsCs '. '.> 1$)))-/:HH  $H 1$s+;'<'<<< *=1+< =II $I! U&9 U/?@STH,0,B,B#^Xi)A- -)
  4.6.H.HIb.c.c++.3+ ,C( "11J!K!.&?#)##N333 )7799$$W----O'.P !!*<!8!8 .	 .	A~#A4466H 6'?3355(O==??Miia!9!9!;!;\!_=`=`=b=b cddG#A//1177<<HOOLO4466<<SAABBBh''' # B  !SUabcUdUmUmUoUoqtu  rA  rA  B  B  B%(____j6H6H6J6J6J6JKgUX__G1}}'?@@hAWAaAaAcAcdd):>>xLLH)!_5577N " )nr&9&9(--// e!%NDLLXf<g<g+h!i!i "&NDLLSa<b<b+c!d!d&&"%99]1%566" &( (    HH
 ..00 	J!99.IIN 	""&"A"ABU"V"V&&!!r6   c                 2    |                      |dd          S )z
        Vocalize the text and give suggestion to improve tashkeel by user.
        @param text: input text.
        @type text: unicode.
        @return: vocalized text.
        rtype: dict of dict.
        Thtmlr   r   r   r2   rx   s     r4   tashkeel_ouput_html_suggestz)TashkeelClass.tashkeel_ouput_html_suggest  s     }}Td6}JJJr6   c                 2    |                      |dd          S )z
        Vocalize the text witthout suggestion
        @param text: input text.
        @type text: unicode.
        @return: vocalized text.
        rtype: text.
        Frx   r   r   r   s     r4   tashkeel_output_textz"TashkeelClass.tashkeel_output_text  s     }}TeF}KKKr6   c                 B   t          j        dd|          }t          j        dd|          }t          j        dd|          }t          j        dt          j        z  dt          j        z  |          }t          j        d	d
|          }t          j        dd|          }|S )u  
        Ajust the resulted text after vocalization to correct some case
        like 'meeting of two queiscents = ألتقاء الساكنين'
        @param text: vocalized text
        @type text: unicode
        @return: ajusted text.
        @rtype: unicode
        u   \sمِنْ\s+اu    مِنَ اu   \sمَنْ\s+اu    مَنِ اu   \sعَنْ\s+اu    عَنِ اu	   \s%s\s+اu    %s اu    ([.?!, :)”—]($| ))u   \sبْنُ\su    بْن )resubarabySUKUNKASRAr   s     r4   r   z%TashkeelClass._ajust_vocalized_result  s     v(/4@@ v(/4@@v(/4@@vl5;.	EK0GNN v0%>> vo{D99 r6   c                 r   t          t          |          dz
            D ]}|dz   t          |          k     rd||dz            v r||dz            d                             t          j                  r||         d         dv rZ||         d         dk    rd||         d<   ||         d         dk    rd||         d<   ||         d         dk    rd	||         d<   ||         d                             t          j                  r,||         d         d
d         t          j        z   ||         d<   |S )u1  
        Ajust the resulted text after vocalization to correct some case
        like 'meeting of two queiscents = ألتقاء الساكنين'
        @param text: _suggest_list
        @type text: list of dict of unicode
        @return: _suggest_list.
        @rtype: list of dict of unicode
        r   r   )   مَنْ   مِنْ   عَنْr   u   مِنَr   u   عَنِr   u   مَنِNr   )rangert   
startswithr   ALEFendswithr   r   )r2   _suggest_listr   s      r4   r   z(TashkeelClass.ajust_vocalized_suggestion  s]    s=))!+,, 	_ 	_AsS''''Xqs9K-K-Kac"8,77
CC .L !#H-1XXX(+H5DD9DM!,X66*1-h7;FF9DM!,X66*1-h7;FF9DM!,X6"1%h/88EE _1>q1A(1KCRC1PSXS^1^M!$X. r6   c                 T    |                                 }|dk    rd|z  S |dk    r|S |S )z
        format the vocalized word to be displayed on web interface.
        @param word: input vocalized word.
        @type word: unicode.
        @return: html code.
        rtype: unicode.
        r   z0<span id='vocalized' class='vocalized'>%s</span>rx   )lower)r2   wordr   s      r4   r   zTashkeelClass.display6  s@     (--//V##FMMv%%KKr6   c                     |                      |          }d}|D ]2}|D ]-}|                                }d                    ||g          }.3|S )z
        Vocalize the text.
        @param text: input text.
        @type text: unicode.
        @return: vocalized text.
        rtype: unicode.
        r   r   )r}   r   r   )r2   rx   r   r   word_analyze_listitemvocs          r4   assistanttashkeelzTashkeelClass.assistanttashkeelF  sy      ,,T22!1 	B 	B * B B((**!%NC+@!A!AB r6   c                    t          t          j                                                  D ].}t	          j        d|z  dt          j        |         z  |          }/| j                            |          }t          j	        
                    |          }| j                            dd                    |                     t          |          t          |          k    r.| j                            dd                    |                     t          j                            |          }| j                            dd                    |                     d                    |          S )z
        Vocalize the text by evident cases and by detecting numbers clauses
        @param text: input text.
        @type text: unicode.
        @return: statisticlly vocalized text.
        rtype: unicode.
        z\s%s\sz %s zpre_tashkeel numbers %sr   znb %s+znmd %s)r   r   CorrectedTashkeelkeysr   r   r    tokenizepyarabicnumberpre_tashkeel_numberr   r+   r   rt   namedpretashkeel_named)r2   rx   abrwordlistprevocalized_lists        r4   r   zTashkeelClass.pre_tashkeelZ  s9    8==??@@ 	9 	9C6)C-,S1*2379 9DD=))$//$O??II3TYY?P5Q5QRRR  !!S]]22Kgtyy1B'C'CDDD$N<<=NOO(DII.?$@$@AAAyy*+++r6   c                     | j                             |          }| j                            |          }| j                             |          \  }}d                    |          S )z
        Vocalize the text by statistical method according to the collocation dictionary
        @param text: input text.
        @type text: unicode.
        @return: statisticlly vocalized text.
        rtype: unicode.
        r   )r/   lookup4long_collocationsr    r   lookupr   )r2   rx   r   newlisttaglists        r4   r   zTashkeelClass.stat_tashkeelu  s^     z22488 =))$//  :,,X66yy!!!r6   Nc                    d}d}d}	|}
|
rd}nd}|r|                                 }nd}|                                 }g }t          |          dk    rjd                                         rNd                             | j                            d                                                              d}	nd}	|	st          |fd          }|	sY|
r|
                                rC| 	                    |||          \  }}	| j
                            d	t          |                     |	sm| j
                            d
t          |                     |                     ||
|||          \  }}	| j
                            dt          |                     |	sA|                     |          \  }}	| j
                            dt          |                     |d         }|         }|	sd}	t          r;| j
                            d|	d                    fd|D                                  |                    |	           |                                S )a  
        Choose a tashkeel for the current word, according to the previous one.
        A new algorithm
        @param caselist: list of steming result of the word.
        @type caselist: list of stemmedSynword
        @param: the choosen previous word stemming.
        @type previous_chosen_case:stemmedSynwordhg
        @return: the choosen stemming of the current word.
        @rtype:stemmedSynword.
        NFr   r   f   e   c                 8    |                                           S Nget_freqxcaselists    r4   <lambda>z1TashkeelClass.__choose_tashkeel.<locals>.<lambda>  s    hqk6J6J6L6L r6   )keyz2:%sz2-1:%sz3: %sz4: %sd   z
100: %d %s, c                 D    g | ]}|                                          S  r   .0r  r	  s     r4   
<listcomp>z3TashkeelClass.__choose_tashkeel.<locals>.<listcomp>  s*    =l=l=l^_hqk>W>W>Y>Y=l=l=lr6   )get_chosen_indexesrt   
is_unknownset_vocalizedr1   r   r   sorted
is_initial_TashkeelClass__choose_initialr   r+   rs   _TashkeelClass__choose_cases_TashkeelClass__choose_defaultr   set_ruler   )r2   r	  r   previous_chosen_caser   r   r   r   chosen_indexr   r   pre_relationnext_chosen_indexesindxlisttmplists    `             r4   __choose_tashkeelzTashkeelClass.__choose_tashkeel  s    ' 	LLL 	'"+">">"@"@"& "4466x==A{%%'' ))$*@*G*GQRH\H\H^H^*_*_``` 	Nh,L,L,L,LMMMH 	6X 	6)<)<)>)> 	6!228XH[]fggNHdKfd8nn555  	7KhX777!008XObdm  pH  I  INHdKgtH~~666  	8!228XFFNHdKgX777  {,'CT 	oKlD%**=l=l=l=lck=l=l=l2m2mnnn 	!!!r6   c           	         d}|s|                                  r| j                            dt          |                     fd|D             }t	          t          |          |d          \  }}| j                            dt          |                     | j                            dt          |                     |sF|                                 r1| j                            dt                               |D ]j}| j                            d||                                                    | j                            d|t          |         j                             k| j                            dt          |                     fd	|D             }| j                            d
t          |                     t	          ||d          \  }}| j                            dt          |                     t          t          fd|                    }| j                            dt          |                     t	          ||d          \  }}||fS )aQ  
        Select the initial case to start a sentence
        @param caselist: list of steming result of the word.
        @type caselist: list of stemmedSynword
        @param indxlist: list of index.
        @type indxlist: list of integer
        @return: indexlist and rule.
        @rtype:( list of integer, integer/False) .
        Fz10 %sc                 J    g | ]}|                                        | S r  )has_sem_nextr  r  r	  r   s     r4   r  z2TashkeelClass.__choose_initial.<locals>.<listcomp>  s1    \\\Qhqk.F.FGZ.[.[\q\\\r6      z11 %sz12a %s next_chosenz12b [%d] : %s nextsz12e %s indxlistc                 J    g | ]}|                                        | S r  has_nextr'  s     r4   r  z2TashkeelClass.__choose_initial.<locals>.<listcomp>  s1    XXXQhqk.B.BCV.W.WXqXXXr6   z12c %s indxlist   z12 %s indxlistc                     |                                           pN|                                          o|                                           p|                                          S r  )is_stopword
is_marfou3
is_passiveis_pastr  s    r4   r
  z0TashkeelClass.__choose_initial.<locals>.<lambda>  sb    !)@)@)B)B **1+((**K8A;3I3I3K3K/K**QK'')) r6   z13 %s   )
ri   r   r+   rs   _get_indexlist_and_ruler   ra   rw   nextfilter)r2   r!  r	  r   r   r   r"  r  s     ``    r4   __choose_initialzTashkeelClass.__choose_initial  sj     	7::<< 	7KgtH~~666\\\\\(\\\G4T']]HhXYZZNHdKgtG}}555KgtH~~666 	@::<< 	@K2D9L4M4MNNN S S!!"78A;;O;O;Q;QRRR!!"74@P;Q;QRRRRK/h@@@XXXXX(XXXGK/g??? 5WhRTUUNHdK.X??? v , , , ,-57 7 8 8 	'4>>2220(HcRR$$r6   c                 H    d}|s                                  r j                            dt          |                     t	          t           fd|                    }t          |||          \  }} j                            dt          |                     t	          t           fd|                    } j                            dt          |                     t          ||d          \  }}t	          t           fd|                    } j                            d	t          |                     t          ||d
          \  }}t	          t           fd|                    } j                            dt          |                     t          ||d          \  }}|s]t	          t          fd|                    }t          ||d          \  }} j                            dt          |                     |s`t	          t           fd|                    } j                            dt          |                     t          ||d          \  }}|s^t	          t          fd|                    } j                            dt          |                     t          ||d          \  }}|st                                 r`t	          t           fd|                    } j                            dt          |                     t          ||d          \  }}|ss                                 r_t	          t           fd|                    } j                            dt          |                     t          ||d          \  }}|st                                 r`t	          t           fd|                    } j                            dt          |                     t          ||d          \  }}|st                                 r`t	          t           fd|                    }t          ||d          \  }} j                            d t          |                     |sr                                 r^t	          t          fd!|                    } j                            d"t          |                     t          ||d#          \  }}|ss                                 r_t	          t           fd$|                    } j                            d%t          |                     t          ||d&          \  }}|su|r|                                s|s]t	          t          fd'|                    } j                            d(t          |                     t          ||d)          \  }}||fS )*aY  
        Select the case  in the middle, when it is possible
        @param caselist: list of steming result of the word.
        @type caselist: list of stemmedSynword
        @param indxlist: list of index.
        @type indxlist: list of integer
        @return: indexlist and rule.
        @rtype:( list of integer, integer/False) .
        r   z14-a %sc                 |    j                             |                    p|                                        S r  r(   
is_relatedr&  r  r	  r   r   r2   s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>  sH    DK,B,B8XVW[,Y,Y  -K]efg]h]u]u  wJ  ^K  ^K r6   z14-b %sc                 |    j                             |                    o|                                        S r  r9  r;  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>   sH    DK,B,B8XVW[,Y,Y  -L^fgh^i^v^v  xK  _L  _L r6   z14-c %s   c                 |    j                             |                    o|                                        S r  )r(   r:  r+  r;  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>(  sH    DK,B,B8XVW[,Y,Y  -H^fgh^i^r^r  tG  _H  _H r6   z15 %s   c                 |    j                             |                    o|                                        S r  )r%   r:  r&  r;  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>0  sH    DL,C,CHhWXk,Z,Z  -M_ghi_j_w_w  yL  `M  `M r6   z16 %sr(  c                 8    |                                           S r  )r.  r  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>8  s    Xa[-D-D-F-F r6      c                     |                                           o;j                            |                    o|                                        S r  )r.  r%   r:  r+  r;  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>B  sY    Xa[-D-D-F-F .TDLLcLcdlnvwxnyLzLz MS)1!)=)=>Q)R)R r6   z17 %s)   c                 n    |                                           o|                                        S r  r.  r+  r  r	  r   s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>J  s0    Xa[-D-D-F-F-t8TU;K_K_`sKtKt r6   z18 %s
   c                 |    j                             |                    o|                                        S r  )r%   r:  r+  r;  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>\  sH    T\-D-DXxXY{-[-[  .J`hij`k`t`t  vI  aJ  aJ r6   z19 %s   c                 F    j                             |                    S r  )r%   r:  r  r	  r   r2   s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>b  s    T\-D-DXxXY{-[-[ r6   z20 %s   c                     j                             |                    o-j                                                 |                     S r  r%   r:  are_compatible_relationsr   r  r	  r   r   r2   s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>j  w    T\-D-DXxXY{-[-[  .`d`l  aF  aF  G_  ai  a{  a{  |}  a~  a~  a  a r6   z21 %s   c                     j                             |                    o-j                                                 |                     S r  rO  rQ  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>{  rR  r6   z22 %sc                 :    |                                         S r  r*  rG  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>  s    HQK,@,@AT,U,U r6   z23 %sr,  c                     j                             |                    o4|                                          o|                                           S r  )r%   r:  is_noun
is_mansoubrL  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>  sw    T\-D-DXxXY{-[-[  .W`hij`k`s`s`u`u  .W  G  HI  J  U  U  W  W  {W r6   z24 %s   c                 n    |                                           p|                                           S r  )	is_tanwinrW  r  s    r4   r
  z.TashkeelClass.__choose_cases.<locals>.<lambda>  s0    0E0E0G0G0dxXY{ObObOdOdKd r6   z25 %s   )	ri   r   r+   rs   r   r5  r3  ra   is_break)	r2   r!  r	  r   r   r   r   r   r"  s	   ` ``` `  r4   __choose_caseszTashkeelClass.__choose_cases
  s     	U::<< 	UKih888 6  #K  #K  #K  #K  #K  #K  #K  MU  V  V  W  WG 5WhRVWWNHdKih8886  #L  #L  #L  #L  #L  #L  #L  NV  W  W  X  XGKih8884WhRSTTNHd 6  #H  #H  #H  #H  #H  #H  #H  JR  S  S  T  TGKgtH~~6664WhRSTTNHd 6  #M  #M  #M  #M  #M  #M  #M  OW  X  X  Y  YGKgtH~~6664WhRSTTNHd  	76"F"F"F"FQQRRG 5WhRSTTNHdKgtH~~666
  	V6 #T #T #T #T #T #T #TV^` ` a aG KgtH~~6664WhRTUUNHd 	V6"t"t"t"t"tw  A  A  B  BGKgtH~~666 5WhRTUUNHd  	V::<< 	V6  #K  #K  #K  #K  #K  #K  #K  MU  V  V  W  WGKgtH~~6664WhRTUUNHd 	V;;== 	V6"\"\"\"\"\"\^fgghhG KgtH~~6664WhRTUUNHd 	V;;== 	V6  #A  #A  #A  #A  #A  #A  #A  CK  L  L  M  MGKgtH~~6664WhRTUUNHd  	7;;== 	76  #A  #A  #A  #A  #A  #A  #A  CK  L  L  M  MG 5WhRTUUNHdKgtH~~666  	V::<< 	V6"U"U"U"U"UW_``aaGKgtH~~6664WhRTUUNHd  	V::<< 	V6  #X  #X  #X  #X  #X  #X  Zb  c  c  d  dGKgtH~~6664WhRTUUNHd 	Z Zi0022 Z9 Zv&d&d&d&dgoppqq!!'4>>::: "9(HVX!Y!Y$~r6   c                 H   dt          fd|D                       t          t          fd|                    }t          ||d          \  }}|s5t          t          fd|                    }t          ||d          \  }}|s5t          t          fd|                    }t          ||d          \  }}|s5t          t          fd	|                    }t          ||d
          \  }}|s5t          t          fd|                    }t          ||d          \  }}|s5t          t          fd|                    }t          ||d          \  }}|s5t          t          fd|                    }t          ||d          \  }}||fS )aW  
        Select default cases when all others methods fail
        @param caselist: list of steming result of the word.
        @type caselist: list of stemmedSynword
        @param indxlist: list of index.
        @type indxlist: list of integer
        @return: indexlist and rule.
        @rtype:( list of integer, integer/False) .
        r   c                 D    g | ]}|                                          S r  r  r  s     r4   r  z2TashkeelClass.__choose_default.<locals>.<listcomp>  s)    @@@!x{++--@@@r6   c                 @    |                                           k    S r  r  )r  r	  maxfreqs    r4   r
  z0TashkeelClass.__choose_default.<locals>.<lambda>  s    (<(<(>(>'(I r6      c                 n    |                                           o|                                           S r  )rW  is_dualr  s    r4   r
  z0TashkeelClass.__choose_default.<locals>.<lambda>  s0    hqk.A.A.C.C.bXVW[M`M`MbMbIb r6       c                 l    |                                           o|                                          S r  )rW  rX  r  s    r4   r
  z0TashkeelClass.__choose_default.<locals>.<lambda>  s-    hqk.A.A.C.C.aRSI_I_IaIa r6   !   c                 n    |                                           o|                                           S r  )is_verbr0  r  s    r4   r
  z0TashkeelClass.__choose_default.<locals>.<lambda>  s0    hqk.A.A.C.C.dHUVKLbLbLdLdHd r6   #   c                     |                                           o3|                                          p|                                          S r  )rj  r/  r1  r  s    r4   r
  z0TashkeelClass.__choose_default.<locals>.<lambda>  sE    hqk.A.A.C.C.{RSI_I_IaIaIzemnoepexexezez r6   $   c                 l    |                                           o|                                          S r  )rj  is3rdpersonr  s    r4   r
  z0TashkeelClass.__choose_default.<locals>.<lambda>  -    hqk.A.A.C.C.aQRH_H_HaHa r6   %   c                 l    |                                           o|                                          S r  )rj  is1stpersonr  s    r4   r
  z0TashkeelClass.__choose_default.<locals>.<lambda>  rp  r6   &   )maxr   r5  r3  )r2   r!  r	  r"  r   rb  s     `  @r4   __choose_defaultzTashkeelClass.__choose_default  s    @@@@x@@@AAvIIIII8TTUU0(HbQQ$ 	V 6"c"c"c"cemnnooG 5WhRTUUNHd 	V6"b"b"b"bdlmmnnG 5WhRTUUNHd  	V6"e"e"e"egoppqqG 5WhRTUUNHd 	V6"|"|"|"|  G  H  H  I  IG4WhRTUUNHd 	V6"b"b"b"bdlmmnnG4WhRTUUNHd 	V6"b"b"b"bdlmmnnG4WhRTUUNHd~r6   c                   
 d}d}|r|                                 
nd
|                                 }g }	d}|r|r|                                r|	r|	}|rt          t          
fd|                    }	|	r|	}t	          |          dk    rd}|rCt          d|d                    fd|D                                           d	                     |                    |           dS )
a  
        reduce  possible cases for the current word, according to the previous one and nexts.
        A new algorithm
        @param: list of steming result of the word.
        @type caselist: list of stemmedSynword
        @param: the choosen previous word stemming.
        @type previous_chosen_case:stemmedSynwordhg
        @return: the choosen stemming of the current word.
        @rtype:stemmedSynword.
        r   FNc                 n    |                                           o|                                        S r  rF  rG  s    r4   r
  z.TashkeelClass.__reduce_cases.<locals>.<lambda>  s2    Xa[-D-D-F-F .HUVKL`L`atLuLu r6   r   rB  r  c                 D    g | ]}|                                          S r  r  r  s     r4   r  z0TashkeelClass.__reduce_cases.<locals>.<listcomp>#  s*    /^/^/^PQ0I0I0K0K/^/^/^r6   utf8)	r  r]  r   r5  rt   printr   r   set_chosen_indexes)r2   r	  r   r  r   r   r   r+   r!  r"  r   s    `        @r4   __reduce_caseszTashkeelClass.__reduce_cases  sM     	'"+">">"@"@"& "4466  	#Y 	#9+=+=+?+? 	#
  #"  		p6 # # # # ##% % & &G  1"x==A%%qoeAdEJJ/^/^/^/^U]/^/^/^$_$_$f$fgm$n$nooo 	))(33333r6   )F)Frx   )rx   )NNNF)NNN)+r   
__module____qualname____doc__r5   r,   r;   r>   r@   rB   rD   rG   rI   rL   rO   rQ   rT   rV   rX   r[   r^   ra   re   rg   ri   rl   rn   rp   r}   r   r   r   r   r   r   r   r   r   r   r  r  r  r   r  r6   r4   r	   r	   -   s        JD JD JD JD\, , ,* * *5 5 5+ + +$ $ $/ / /. . .* * *+ + +, , ,2 2 2& & &' ' '& & &. . ./ / /. . .. . ./ / /. . ./ / /0 0 0/ / /#" #" #"Je" e" e" e"NK K KL L L  8  8      (, , ,6" " "4 >B@EJ" J" J" J"X- - -`S S Sj@ @ @L >B!%C4 C4 C4 C4 C4 C4r6   r	   c           	          d}| r| }t          |          dk    r|nd}|rDt          d||d                    fd|D                                           d                     ||fS )z
    Just a macro to avoid repetetion
    if tmplist is not empty, change indexlist,
    if the indexlist has one element, the rule is applyed
    Fr   r   zchoose tashkeelr  c                 D    g | ]}|                                          S r  r  r  s     r4   r  z+_get_indexlist_and_rule.<locals>.<listcomp>8  s*    ?o?o?o`a@Y@Y@[@[?o?o?or6   rz  )rt   r{  r   r   )r"  	indexlistr	  r   r+   applied_rules     `   r4   r3  r3  *  s     E 	 y>>Q..44AL AlEJJ?o?o?o?oen?o?o?o4p4p4w4wx~44  	A  	A  	Al""r6   c                      t          d           t                      } d}|                     |          }t          |                    d                     dS )z
    main test
    testu%   يعبد الله تطلع الشمسrz  N)r{  r	   r   r   )	vocalizerrx   r   s      r4   mainlyr  <  sR     
&MMMI 4D


T
"
"C	#**V

r6   __main__)&r  sysr   r   operatorr   pyarabic.arabyr   qalsadi.analexr   sylajone.anasynr"   sylajone.syn_constasmai.anasemr'   maskouk.collocationscollocationsr-   pyarabic.numberr   pyarabic.namedzipr   	itertoolsr   mysam.tagcoderr   r   r   r   pathr   r   r+   r	   r3  r  r   r  r6   r4   <module>r     s    


 				                              # # # # # #                                        z4 z4 z4 z4 z4 z4 z4 z4z# # #$
 
 
 z
FHHHHH r6   