
    -i                     B    d Z ddlZddlmZ d Zd Zd Zd Zd Z	d Z
dS )	a-  
Normalize
Utility functions used by to prepare an arabic text to search and index.
@author: Taha Zerrouki <taha_zerrouki at gmail dot com>
@author: Taha Zerrouki
@contact: taha dot zerrouki at gmail dot com
@copyright: Arabtechies, Arabeyes, Taha Zerrouki
@license: GPL
@date:2017/02/15
@version:0.3
    Nc                 B    t           j                            d|           S )u  Strip vowel from a text and return a result text.
    The striped marks are :
        - FATHA, DAMMA, KASRA
        - SUKUN
        - SHADDA
        - FATHATAN, DAMMATAN, KASRATAN, , , .
    Example:
        >>> text=u"الْعَرَبِيّةُ"
        >>> strip_tashkeel(text)
        العربية

    @param text: arabic text.
    @type text: unicode.
    @return: return a striped text.
    @rtype: unicode.
     )	arabconstHARAKAT_PATsubtexts    R/var/www/html/speakWrite/venv/lib/python3.11/site-packages/tashaphyne/normalize.pystrip_tashkeelr      s    "  $$R...    c                 H    t          j        dt          j        z  d|           S )u   
    Strip tatweel from a text and return a result text.

    Example:
        >>> text=u"العـــــربية"
        >>> strip_tatweel(text)
        العربية

    @param text: arabic text.
    @type text: unicode.
    @return: return a striped text.
    @rtype: unicode.
    [%s]r   )rer   r   TATWEELr   s    r
   strip_tatweelr   ,   s      6'I--r4888r   c                     t           j                            t           j        |           } t           j                            t           j        |           S )u"  Normalize Hamza forms into one form, and return a result text.
    The converted letters are :
        - The converted lettersinto HAMZA are: WAW_HAMZA,YEH_HAMZA
        - The converted lettersinto ALEF are: ALEF_MADDA,
        ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW ,HAMZA_ABOVE, HAMZA_BELOW

    Example:
        >>> text=u"أهؤلاء من أولئكُ"
        >>> normalize_hamza(text)
        اهءلاء من اولءكُ

    @param text: arabic text.
    @type text: unicode.
    @return: return a converted text.
    @rtype: unicode.
    )r   
ALEFAT_PATr   ALEF
HAMZAT_PATHAMZAr   s    r
   normalize_hamzar   >   s9    " ##IND99D##IOT:::r   c                 r    t           j                            t           j        t           j        |           S )ue  Normalize Lam Alef ligatures into two letters (LAM and ALEF),
    and return a result text.
    Some systems present lamAlef ligature as a single letter,
    this function convert it into two letters,
    The converted letters into  LAM and ALEF are :
        - LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_HAMZA_BELOW,
         LAM_ALEF_MADDA_ABOVE

    Example:
        >>> text=u"لانها لالئ الاسلام"
        >>> normalize_lamalef(text)
        لانها لالئ الاسلام

    @param text: arabic text.
    @type text: unicode.
    @return: return a converted text.
    @rtype: unicode.
    )r   LAMALEFAT_PATr   LAMr   r   s    r
   normalize_lamalefr   S   s1    & "&&}}inn-t5 5 5r   c                     t          j        dt          j        z  t          j        |           } t          j        dt          j        z  t          j        |           S )uI  Normalize some spellerrors like,
    TEH_MARBUTA into HEH,ALEF_MAKSURA into YEH, and return
    a result text.
    In some context users omit the difference between TEH_MARBUTA
    and HEH, and ALEF_MAKSURA and YEh.
    The conversions are:
        - TEH_MARBUTA into HEH
        - ALEF_MAKSURA into YEH

    Example:
        >>> text=u"اشترت سلمى دمية وحلوى"
        >>> normalize_spellerrors(text)
        اشترت سلمي دميه وحلوي

    @param text: arabic text.
    @type text: unicode.
    @return: return a converted text.
    @rtype: unicode.
    r   )r   r   r   TEH_MARBUTAHEHALEF_MAKSURAYEHr   s    r
   normalize_spellerrorsr!   j   s?    ( 6'I119=$GGD6'I22IM4HHHr   c                     t          |           } t          |           } t          |           } t          |           } t	          |           } | S )u:  Normalize input text and return a result text.
    Normalize a text by :
        - strip tashkeel
        - strip tatweel
        - normalize  Hamza
        - normalize Lam Alef.
        - normalize Teh Marbuta and Alef Maksura
    Example:
        >>> text=u'أستشتري دمـــى آلية لأبنائك قبل الإغلاق'
        >>> normalize_searchtext(text)
        استشتري دمي اليه لابناءك قبل الاغلاق

    @param text: arabic text.
    @type text: unicode.
    @return: return a normalized text.
    @rtype: unicode.
    )r   r   r   r   r!   r   s    r
   normalize_searchtextr#      sJ    $ $DDT""D4  D &&DKr   )__doc__r   tashaphyne.arabic_constarabic_constr   r   r   r   r   r!   r#    r   r
   <module>r(      s   
 
 
			 + + + + + +/ / /,9 9 9$; ; ;*5 5 5.I I I6    r   