
    -iD                         d Z edk    r8ddlZej                            d           ej                            d           ddlmZ ddlZ	 G d d	          Z
d
 Zedk    r e             dS dS )z0
A class to remove ambiguation in text analysis
__main__    Nz../z
../support   )disambig_constc                       e Zd ZdZd Zd Zed             Zed             Zed             Z	ed             Z
ed             Zed	             Zed
             ZdS )Disambiguatorz8
    A class to remove ambiguation in text analysis
    c                 L    t           j                                        | _        d S )N)	naftawayhwordtag
WordTaggertagger)selfs    N/var/www/html/speakWrite/venv/lib/python3.11/site-packages/qalsadi/disambig.py__init__zDisambiguator.__init__"   s      '2244    c                    |r t          |          t          |          k    r|S g }t          t          ||                    }t          |          D ]$\  }}|d         }|                     |          r|dz
  dk    r||dz
           d         }nd}|                     ||          }	|	|k    r|	}n|dz   t          |          k     r||dz            d         }
| j                            |
          r+|                     |          r| 	                    |          }nD| j        
                    |
          r*|                     |          r|                     |          }|                    |           &|S )a}  
        Disambiguate some word according to tag guessing to reduce cases.
        return word list with dismbiguate.
        @param word_list: the given word lists.
        @type word_list: unicode list.
        @param tag_list: the given tag lists, produced by naftawayh
        @type tag_list: unicode list.
        @return: a new word list
        @rtype: unicode list
        r   r    )lenlistzip	enumerateis_ambiguousget_disambiguated_by_prev_wordr   is_verb_tagis_disambiguated_by_next_verbget_disambiguated_by_next_verbis_noun_tagis_disambiguated_by_next_nounget_disambiguated_by_next_nounappend)r   	word_listtag_listnewwordlistwordtaglistir
   currentwordpreviouswordtmpwordnexttags              r   disambiguate_wordsz Disambiguator.disambiguate_words'   s     )	C	NNc(mm;;Ks9h7788K'44 !0 !0
7%aj$$[11 1uzz'21q5'9!'<')"AA#\ G +--&-Q[!1!111"-a!e"4Q"7
  ;22#  "@@MM +/*M*M ++ +KK "[44#  "@@MM +/*M*M ++ +K "";////r   c                     | t           j        v S )ztest if the word is an ambiguous case
        @param word: input word.
        @type word: unicode.
        @return : if word is ambiguous
        @rtype: True/False.
        )dconstDISAMBIGUATATION_TABLEwords    r   r   zDisambiguator.is_ambiguous`   s     v444r   c                     t           j                            | i                               di                               d|           S )a  get The disambiguated form of the word by the next word is noun.
        The disambiguated form can be fully or partially vocalized.
        @param word: input word.
        @type word: unicode.
        @return : if word is ambiguous
        @rtype: True/False.
        noun	vocalizedr+   r,   getr-   s    r   r   z,Disambiguator.get_disambiguated_by_next_nounj   ;     )--dB77S__Sd##	
r   c                     t           j                            | i                               di                               ||           S )aT  get The disambiguated form of the word by the previous.
        The disambiguated form can be fully or partially vocalized.
        @param word: input word.
        @type word: unicode.
        @param previous: input previous word.
        @type previous: unicode.
        @return : if word is ambiguous
        @rtype: True/False.
        previousr2   )r.   r6   s     r   r   z,Disambiguator.get_disambiguated_by_prev_wordy   s=     )--dB77SR  S4  	
r   c                     t           j                            | i                               di                               ||           S )aD  get The disambiguated form of the word by the next.
        The disambiguated form can be fully or partially vocalized.
        @param word: input word.
        @type word: unicode.
        @param next: input next word.
        @type next: unicode.
        @return : if word is ambiguous
        @rtype: True/False.
        nextr2   )r.   w_nexts     r   get_disambiguated_by_next_wordz,Disambiguator.get_disambiguated_by_next_word   s;     )--dB77S__S	
r   c                     t           j                            | i                               di                               d|           S )a  get The disambiguated form of the word by the next word is a verb.
        The disambiguated form can be fully or partially vocalized.
        @param word: input word.
        @type word: unicode.
        @return : if word is ambiguous
        @rtype: True/False.
        verbr1   r2   r-   s    r   r   z,Disambiguator.get_disambiguated_by_next_verb   r4   r   c                 F    dt           j                            | i           v S )ztest if the word can be disambiguated if the next word is a noun
        @param word: input word.
        @type word: unicode.
        @return : if word has an disambiguated.
        @rtype: True/False.
        r0   r2   r-   s    r   r   z+Disambiguator.is_disambiguated_by_next_noun   "     6::4DDDDr   c                 F    dt           j                            | i           v S )ztest if the word can be disambiguated if the next word is a verb
        @param word: input word.
        @type word: unicode.
        @return : if word has an disambiguated.
        @rtype: True/False.
        r<   r2   r-   s    r   r   z+Disambiguator.is_disambiguated_by_next_verb   r>   r   N)__name__
__module____qualname____doc__r   r)   staticmethodr   r   r   r:   r   r   r    r   r   r   r      s        5 5 5
7 7 7r 5 5 \5 
 
 \
 
 
 \
  
 
 \
  
 
 \
 E E \E E E \E E Er   r   c                  ^   d} |                      d          }t                      }|j                            |          }t	          d                    |                     |                    ||          }t	          d                    |                              d                     dS )z
    Main test
    uv      السلام أن العبادي كان أعلن فتح المنطقة أن السلام مفيد أن يركبوا 	utf8N)splitr   r   word_taggingprintjoinr)   encode)textwordlistdisambtaglistr"   s        r   mainlyrS      s     DDzz#H__Fm((22G	$))G

++Hg>>K	#((;


&
&v
.
./////r   )rC   r@   syspathr   r   r   r+   naftawayh.wordtagr	   r   rS   rE   r   r   <module>rW      s     zJJJHOOEHOOL!!! ' & & & & &    _E _E _E _E _E _E _E _ED0 0 0  z
FHHHHH r   