
    -i                         d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlZddl	Z	 G d d          Z
d	 Zed
k    r e             dS dS )z
Syntaxic Analysis
    N   )analex)stemnode)stemmedwordc                   d    e Zd ZdZddZd Zd ZddZd Zd	 Z	d
 Z
ddZd Zd ZddZddZdS )
Lemmatizerz
    Arabic Lemmatizer
    Fc                 F    t          j        |          | _        d| _        dS ) FN)r   Analexanalexervocalized_lemma)self
cache_paths     P/var/www/html/speakWrite/venv/lib/python3.11/site-packages/qalsadi/lemmatizer.py__init__zLemmatizer.__init__#   s#     j11$    c                     d S )N r   s    r   __del__zLemmatizer.__del__)   s    r   c                 r    g }g }|D ]/}|                     t          j        || j                             0|S )a1  
        lemmatization  of stemming results.
        morphological Result is a list of stemmedword objects
        @param detailed_stemming_dict: detailed stemming dict.
        @type detailed_stemming_dict:list of list of stemmedword objects
        @return: lemmas.
        @rtype: list lemmas.
        )appendr   StemNoder   )r   detailed_stemming_dictstemmedsynwordlistliststemnode_liststemming_lists        r   analyzezLemmatizer.analyze,   sQ     "$ 4 	Y 	YM   !2=$BV!W!WXXXXr    c                     g }|D ]V}|r(|                     |                                           ,|                     |                    ||                     W|S )z9
        Generate all lemmas from stemnode_list

        )pos
return_pos)r   
get_lemmas	get_lemma)r   r   r!   r"   alllemmasstnds          r   r#   zLemmatizer.get_lemmasA   sn    
 ! 	N 	ND Ndoo//0000dnnnLLMMMM r   c                 \    t          j        t          j        |          }d |D             }|S )a/  
        Decode objects result from analysis. helps to display result.
        @param stemmed_synwordlistlist: list of  list of StemmedSynWord.
        @type word_result: list of  list of StemmedSynWord
        @return: the list of list of dict to display.
        @rtype: list of  list of dict
        c                     g | ]	}|j         
S r   )__dict__).0xs     r   
<listcomp>z%Lemmatizer.decode.<locals>.<listcomp>Z   s    333AQZ333r   )	functoolsreduceoperatorconcatr   stemmed_synwordlistlist	flat_lists      r   decodezLemmatizer.decodeQ   s2     $X_6MNN	33333	r   c           	          d}|D ]X}|dz  }|D ]I}|dz  }|j         }t          |                                          D ]}|d|d||         dz  }|dz  }J|dz  }Y|d	z  }|S )
z
        display objects result from analysis
        @param stemmed_synwordlistlist: list of  list of StemmedSynWord.
        @type word_result: list of  list of StemmedSynWord
        [z
	[z
		{z
		u'z' = u'z',z
		}z
	]z
])r*   sortedkeys)r   r3   textrlistitemstmwordkeys          r   displayzLemmatizer.display]   s     , 	 	EGOD " "	!-!',,..11 I ICDccc73<<<HHDD	!GODDr   c                 X    |                      |          }t          j        |           dS )z
        print objects result from analysis
        @param stemmed_synwordlistlist: list of  list of StemmedSynWord.
        @type word_result: list of  list of StemmedSynWord
        N)r5   pprintr2   s      r   rA   zLemmatizer.pprintp   s,     KK 788	i     r   c                 d    | j                             |          }|                     |          }|S )zg
        Text Analysis syntacticly
        @param text: input text
        @type text: unicode
        )r   
check_textr   )r   r:   r   resultstemnodelists        r   analyze_textzLemmatizer.analyze_texty   s0     ))$//||F++r   c                     d| _         dS )k
        set output lemma as vocalized
        @param text: input text
        @type text: unicode
        TNr   r   s    r   set_vocalized_lemmazLemmatizer.set_vocalized_lemma   s      $r   c                     d| _         dS )rH   FNrI   r   s    r   unset_vocalized_lemmaz Lemmatizer.unset_vocalized_lemma   s      %r   c                     | j                             |          }|                     |          }|                     ||||          }|S )\
        Lemmatize text
        @param text: input text
        @type text: unicode
        r"   r!   r%   )r   rC   r   r#   )r   r:   r"   r!   r%   rD   rE   r&   s           r   lemmatize_textzLemmatizer.lemmatize_text   sI     ))$//||F++*#SVWWr   c                 T    |                      ||||          }|r|d         S |rdS dS )rN   rO   r   r   r   )rP   )r   wordr"   r!   r%   lemmasListOfLists         r   	lemmatizezLemmatizer.lemmatize   sJ      ..t
PSY\.]] 
	#A&&  rrr   N)F)r   FF)Fr   F)__name__
__module____qualname____doc__r   r   r   r#   r5   r?   rA   rF   rJ   rL   rP   rT   r   r   r   r   r      s         % % % %    *    
 
 
  &! ! !   $ $ $% % %
 
 
 
     r   r   c                      d} g }t                      }|                    |           }|                    |           }t          j        |           dS )z
    main test
    u   إلى البيتN)r   rF   morphrA   )r:   rD   lemmers      r   mainlyr\      sQ    
 DF\\F  &&F\\$F
M&r   __main__)rX   rA   pyarabic.arabyarabyr   r   r   r   r.   r0   r   r\   rU   r   r   r   <module>r`      s                                  W W W W W W W Wt   z
FHHHHH r   