§
    ‡-”iÅ  ã                   óB   — d Z ddlZddlmZ d„ Zd„ Zd„ Zd„ Zd„ Z	d„ Z
dS )	a-  
Normalize
Utility functions used by to prepare an arabic text to search and index.
@author: Taha Zerrouki <taha_zerrouki at gmail dot com>
@author: Taha Zerrouki
@contact: taha dot zerrouki at gmail dot com
@copyright: Arabtechies, Arabeyes, Taha Zerrouki
@license: GPL
@date:2017/02/15
@version:0.3
é    Nc                 óB   — t           j                             d| ¦  «        S )u¤  Strip vowel from a text and return a result text.
    The striped marks are :
        - FATHA, DAMMA, KASRA
        - SUKUN
        - SHADDA
        - FATHATAN, DAMMATAN, KASRATAN, , , .
    Example:
        >>> text=u"Ø§Ù„Ù’Ø¹ÙŽØ±ÙŽØ¨ÙÙŠÙ‘Ø©Ù"
        >>> strip_tashkeel(text)
        Ø§Ù„Ø¹Ø±Ø¨ÙŠØ©

    @param text: arabic text.
    @type text: unicode.
    @return: return a striped text.
    @rtype: unicode.
    Ú )Ú	arabconstÚHARAKAT_PATÚsub©Útexts    úR/var/www/html/speakWrite/venv/lib/python3.11/site-packages/tashaphyne/normalize.pyÚstrip_tashkeelr      s   € õ" Ô ×$Ò$ R¨Ñ.Ô.Ð.ó    c                 óH   — t          j        dt          j        z  d| ¦  «        S )u   
    Strip tatweel from a text and return a result text.

    Example:
        >>> text=u"Ø§Ù„Ø¹Ù€Ù€Ù€Ù€Ù€Ø±Ø¨ÙŠØ©"
        >>> strip_tatweel(text)
        Ø§Ù„Ø¹Ø±Ø¨ÙŠØ©

    @param text: arabic text.
    @type text: unicode.
    @return: return a striped text.
    @rtype: unicode.
    ú[%s]r   )Úrer   r   ÚTATWEELr   s    r
   Ústrip_tatweelr   ,   s    € õ Œ6'IÔ-Ñ-¨r°4Ñ8Ô8Ð8r   c                 óª   — t           j                             t           j        | ¦  «        } t           j                             t           j        | ¦  «        S )u"  Normalize Hamza forms into one form, and return a result text.
    The converted letters are :
        - The converted lettersinto HAMZA are: WAW_HAMZA,YEH_HAMZA
        - The converted lettersinto ALEF are: ALEF_MADDA,
        ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW ,HAMZA_ABOVE, HAMZA_BELOW

    Example:
        >>> text=u"Ø£Ù‡Ø¤Ù„Ø§Ø¡ Ù…Ù† Ø£ÙˆÙ„Ø¦ÙƒÙ"
        >>> normalize_hamza(text)
        Ø§Ù‡Ø¡Ù„Ø§Ø¡ Ù…Ù† Ø§ÙˆÙ„Ø¡ÙƒÙ

    @param text: arabic text.
    @type text: unicode.
    @return: return a converted text.
    @rtype: unicode.
    )r   Ú
ALEFAT_PATr   ÚALEFÚ
HAMZAT_PATÚHAMZAr   s    r
   Únormalize_hamzar   >   s9   € õ" Ô×#Ò#¥I¤N°DÑ9Ô9€DÝÔ×#Ò#¥I¤O°TÑ:Ô:Ð:r   c                 ór   — t           j                             t           j        ›t           j        ›| ¦  «        S )ue  Normalize Lam Alef ligatures into two letters (LAM and ALEF),
    and return a result text.
    Some systems present lamAlef ligature as a single letter,
    this function convert it into two letters,
    The converted letters into  LAM and ALEF are :
        - LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_HAMZA_BELOW,
         LAM_ALEF_MADDA_ABOVE

    Example:
        >>> text=u"Ù„Ø§Ù†Ù‡Ø§ Ù„Ø§Ù„Ø¦ Ø§Ù„Ø§Ø³Ù„Ø§Ù…"
        >>> normalize_lamalef(text)
        Ù„Ø§Ù†Ù‡Ø§ Ù„Ø§Ù„Ø¦ Ø§Ù„Ø§Ø³Ù„Ø§Ù…

    @param text: arabic text.
    @type text: unicode.
    @return: return a converted text.
    @rtype: unicode.
    )r   ÚLAMALEFAT_PATr   ÚLAMr   r   s    r
   Únormalize_lamalefr   S   s1   € õ& Ô"×&Ò&ÝŒ}ˆ}iœn˜nÐ-¨tñ5ô 5ð 5r   c                 ó¶   — t          j        dt          j        z  t          j        | ¦  «        } t          j        dt          j        z  t          j        | ¦  «        S )uI  Normalize some spellerrors like,
    TEH_MARBUTA into HEH,ALEF_MAKSURA into YEH, and return
    a result text.
    In some context users omit the difference between TEH_MARBUTA
    and HEH, and ALEF_MAKSURA and YEh.
    The conversions are:
        - TEH_MARBUTA into HEH
        - ALEF_MAKSURA into YEH

    Example:
        >>> text=u"Ø§Ø´ØªØ±Øª Ø³Ù„Ù…Ù‰ Ø¯Ù…ÙŠØ© ÙˆØ­Ù„ÙˆÙ‰"
        >>> normalize_spellerrors(text)
        Ø§Ø´ØªØ±Øª Ø³Ù„Ù…ÙŠ Ø¯Ù…ÙŠÙ‡ ÙˆØ­Ù„ÙˆÙŠ

    @param text: arabic text.
    @type text: unicode.
    @return: return a converted text.
    @rtype: unicode.
    r   )r   r   r   ÚTEH_MARBUTAÚHEHÚALEF_MAKSURAÚYEHr   s    r
   Únormalize_spellerrorsr!   j   s?   € õ( Œ6'IÔ1Ñ1µ9´=À$ÑGÔG€DÝŒ6'IÔ2Ñ2µI´MÀ4ÑHÔHÐHr   c                 óœ   — t          | ¦  «        } t          | ¦  «        } t          | ¦  «        } t          | ¦  «        } t	          | ¦  «        } | S )u:  Normalize input text and return a result text.
    Normalize a text by :
        - strip tashkeel
        - strip tatweel
        - normalize  Hamza
        - normalize Lam Alef.
        - normalize Teh Marbuta and Alef Maksura
    Example:
        >>> text=u'Ø£Ø³ØªØ´ØªØ±ÙŠ Ø¯Ù…Ù€Ù€Ù€Ù‰ Ø¢Ù„ÙŠØ© Ù„Ø£Ø¨Ù†Ø§Ø¦Ùƒ Ù‚Ø¨Ù„ Ø§Ù„Ø¥ØºÙ„Ø§Ù‚'
        >>> normalize_searchtext(text)
        Ø§Ø³ØªØ´ØªØ±ÙŠ Ø¯Ù…ÙŠ Ø§Ù„ÙŠÙ‡ Ù„Ø§Ø¨Ù†Ø§Ø¡Ùƒ Ù‚Ø¨Ù„ Ø§Ù„Ø§ØºÙ„Ø§Ù‚

    @param text: arabic text.
    @type text: unicode.
    @return: return a normalized text.
    @rtype: unicode.
    )r   r   r   r   r!   r   s    r
   Únormalize_searchtextr#   …   sJ   € õ$ ˜$ÑÔ€DÝ˜ÑÔ€DÝ˜TÑ"Ô"€DÝ˜4Ñ Ô €DÝ  Ñ&Ô&€DØ€Kr   )Ú__doc__r   Útashaphyne.arabic_constÚarabic_constr   r   r   r   r   r!   r#   © r   r
   ú<module>r(      s“   ðð
ð 
ð 
€	€	€	Ø +Ð +Ð +Ð +Ð +Ð +ð/ð /ð /ð,9ð 9ð 9ð$;ð ;ð ;ð*5ð 5ð 5ð.Ið Ið Ið6ð ð ð ð r   