
    -i                     t    d Z ddlmZmZmZmZ ddlZ	 ddlZn#  ddl	mZ Y nxY wd Z
d Zd Zd	 Zd
 Zd ZdS )a-  
Normalize
Utility functions used by to prepare an arabic text to search and index.
@author: Taha Zerrouki <taha_zerrouki at gmail dot com>
@author: Taha Zerrouki
@contact: taha dot zerrouki at gmail dot com
@copyright: Arabtechies, Arabeyes, Taha Zerrouki
@license: GPL
@date:2017/02/15
@version:0.3
    )absolute_importprint_functionunicode_literalsdivisionN   )arabyc                 *    t          j        |           S )u  Strip vowel from a text and return a result text.
    The striped marks are :
        - FATHA, DAMMA, KASRA
        - SUKUN
        - SHADDA
        - FATHATAN, DAMMATAN, KASRATAN, , , .
    Example:
        >>> text=u"الْعَرَبِيّةُ"
        >>> strip_tashkeel(text)
        العربية

    @param text: arabic text.
    @type text: unicode.
    @return: return a striped text.
    @rtype: unicode.
    )	arabconststrip_tashkeeltexts    P/var/www/html/speakWrite/venv/lib/python3.11/site-packages/pyarabic/normalize.pyr   r       s    " #D)))    c                 *    t          j        |           S )u   
    Strip tatweel from a text and return a result text.

    Example:
        >>> text=u"العـــــربية"
        >>> strip_tatweel(text)
        العربية

    @param text: arabic text.
    @type text: unicode.
    @return: return a striped text.
    @rtype: unicode.
    )r
   strip_tatweelr   s    r   r   r   6   s     "4(((r   c                     t           j                            t           j        |           } t           j                            t           j        |           S )u"  Normalize Hamza forms into one form, and return a result text.
    The converted letters are :
        - The converted lettersinto HAMZA are: WAW_HAMZA,YEH_HAMZA
        - The converted lettersinto ALEF are: ALEF_MADDA,
        ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW ,HAMZA_ABOVE, HAMZA_BELOW

    Example:
        >>> text=u"أهؤلاء من أولئكُ"
        >>> normalize_hamza(text)
        اهءلاء من اولءكُ

    @param text: arabic text.
    @type text: unicode.
    @return: return a converted text.
    @rtype: unicode.
    )r
   ALEFAT_PATTERNsubALEFHAMZAT_PATTERNHAMZAr   s    r   normalize_hamzar   H   s9    " #''	==D#''	>>>r   c                 *    t          j        |           S )ue  Normalize Lam Alef ligatures into two letters (LAM and ALEF),
    and return a result text.
    Some systems present lamAlef ligature as a single letter,
    this function convert it into two letters,
    The converted letters into  LAM and ALEF are :
        - LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_HAMZA_BELOW,
         LAM_ALEF_MADDA_ABOVE

    Example:
        >>> text=u"لانها لالئ الاسلام"
        >>> normalize_lamalef(text)
        لانها لالئ الاسلام

    @param text: arabic text.
    @type text: unicode.
    @return: return a converted text.
    @rtype: unicode.
    )r
   normalize_ligaturer   s    r   normalize_lamalefr   ]   s    & '---r   c                     t          j        dt          j        z  t          j        |           } t          j        dt          j        z  t          j        |           S )uI  Normalize some spellerrors like,
    TEH_MARBUTA into HEH,ALEF_MAKSURA into YEH, and return
    a result text.
    In some context users omit the difference between TEH_MARBUTA
    and HEH, and ALEF_MAKSURA and YEh.
    The conversions are:
        - TEH_MARBUTA into HEH
        - ALEF_MAKSURA into YEH

    Example:
        >>> text=u"اشترت سلمى دمية وحلوى"
        >>> normalize_spellerrors(text)
        اشترت سلمي دميه وحلوي

    @param text: arabic text.
    @type text: unicode.
    @return: return a converted text.
    @rtype: unicode.
    z[%s])rer   r
   TEH_MARBUTAHEHALEF_MAKSURAYEHr   s    r   normalize_spellerrorsr"   r   s?    ( 6'I119=$GGD6'I22IM4HHHr   c                     t          |           } t          |           } t          |           } t          |           } t	          |           } | S )u:  Normalize input text and return a result text.
    Normalize a text by :
        - strip tashkeel
        - strip tatweel
        - normalize  Hamza
        - normalize Lam Alef.
        - normalize Teh Marbuta and Alef Maksura
    Example:
        >>> text=u'أستشتري دمـــى آلية لأبنائك قبل الإغلاق'
        >>> normalize_searchtext(text)
        استشتري دمي اليه لابناءك قبل الاغلاق

    @param text: arabic text.
    @type text: unicode.
    @return: return a normalized text.
    @rtype: unicode.
    )r   r   r   r   r"   r   s    r   normalize_searchtextr$      sJ    $ $DDT""D4  D &&DKr   )__doc__
__future__r   r   r   r   r   r   r
    r   r   r   r   r"   r$    r   r   <module>r)      s   
 
            
			%%$$$$$$$$* * *,) ) )$? ? ?*. . .*I I I6    s    #