
    -ia1                     Z   d Z ddlmZmZmZmZ ddlZedk    r-ej        	                    d           ddl
mZ ddlmZ ddlmZ nddlmZ ddlmZ dd	lmZ d
Zd Zd Zd Zd Zd ZddZd Zd Zedk    rg dZeD ]Z ee                    d                    Z ee            ej        e          Z e ej         e                    Z! ed"                    e!                      ej         e          Z# ee#          Z$ e%e$e#          Z&e&D ]Z' ee'           dS dS )z
Arabic Named enteties recognation pyarabic.named
@author: Taha Zerrouki
@contact: taha dot zerrouki at gmail dot com
@copyright: Arabtechies,  Arabeyes,   Taha Zerrouki
@license: GPL
@date:2017/02/14
@version: 0.3
    )absolute_importprint_functionunicode_literalsdivisionN__main__z../   )araby)named_const)propernouns)u   شمسu   تقيu   علاءu   نجمu   نورu   سيفc                     | t           j        v S )z
    Test if the word is a proper noun
    @param word: given word
    @type word: unicode
    @return: True if is properword
    @rtype: Boolean
    )r   PROPER_NOUNSwords    L/var/www/html/speakWrite/venv/lib/python3.11/site-packages/pyarabic/named.pyis_proper_nounr   '   s     ;+++    c                    g }d}d}t          |           D ]X\  }}|dz   t          |           k     rt          j        | |dz                      }nd}|dz
  dk    r:t          j        | |dz
                     }|r|dk     r|d         dv r
|dd         }nd}t          j        |          }|}	|r|dk     r|d         dv r
|dd         }	|dk     r	|	dv r|}|}|	d	v r|dk     r|}|}|d
v r|dk     r|dz
  }|}|dv r|dk     r|}|}|dk     rt	          |	          r|}|}|dk    rC|                    d          r|                    d          r|}|                    ||f           d}Z|dk    r|                    ||f           |S )u  
    Detect named enteties words in a text and return positions of each phrase.

    Example:
        >>> detect_named_position(u"قال خالد بن رافع  حدثني أحمد بن عنبر عن خاله")
        ((1,3), (6,8))

    @param wordlist: wordlist
    @type wordlist: unicode list
    @return: list of numbers clause positions [(start,end),(start2,end2),]
    @rtype: list of tuple
    Fr    r   u   وu   فu   لu   بu   كN   ابن
r      بن   أبو   أبا   أبي   عبد   عبيد   بنو   بني   بنت
r   r   r   r   r   r   r   r    r!   r"   r   r"      ال   ي)	enumeratelenr	   strip_tashkeelr   
startswithendswithappend)
wordlist	positions
startnamedendnamedir   nextwordpreviousword_nmkeys
             r   detect_named_positionr6   2   s6    IJHX&& 2 24Q3X+HQqSM::HHhQ3!88+HQqSM::H (JNN{AAA#ABB<H&t,, 	zA~~
!*;
;
;!""+C>>c]22JHH P P PA~~
HH O O OA~~qS
HH...A~~
HH!^^s 3 3^JHHQ%%g.. !73C3CE3J3J ! H  *h!7888JJQ*h/000r   c           	      b   g }t          j        |           }t          |          }|D ]}t          |          dk    rq|d         t          |          k    rX|d         t          |          k    r?|                    d                    ||d         |d         dz                                 |S )uj  
    Extract named enteties words in a text.

    Example:
        >>> extract_named(u"قال خالد بن رافع  حدثني أحمد بن عنبر عن خاله")
        ("خالد بن رافع"، "أحمد بن عنبر ")

    @param text: input text
    @type text: unicode
    @return: named enteties words extracted from text
    @rtype: integer
       r   r    r	   tokenizer6   r(   r,   join)textphrasesr-   r.   poss        r   extract_namedr@   |   s     G~d##H%h//I F Fs88q==1vX&&3q6S]]+B+Btyy#a&#a&(2B)CDDEEENr   c           
         g }t          j        |           }t          |          }|D ]}t          |          dk    r|d         t          |          k    r|d         t          |          k    r|d         dz
  dk    r||d         dz
           }nd}|d         dz   t          |          k     r||d         dz            }nd}|                    |d                    ||d         |d         dz                      |f           |S )uR  
    Extract number words in a text.
    Example:
        >>> extract_named_within_context(u"تصدق عبد الله بن عمر بدينار")
        ("تصدق"، "عبد الله بن عمر"، "بدينار")

    @param text: input text
    @type text: unicode
    @return: number words extracted from text
    @rtype: integer
    r8   r   r   r   r9   r:   )r=   r>   r-   r.   r?   r3   r2   s          r   extract_named_within_contextrB      s    G~d##H%h//I 
E 
Es88q==1vX&&3q6S]]+B+Bq6!8q=='Aq1HH!$hq6!8c(mm++'Aq1HH!$h99Xc!fc!fQh&6788( D E E ENr   c                     t          j        |           } | t          j        v rdS | t          j        v rdS | t          j        v rdS dS )ztGet the word tags
    @param word: given word
    @type word: unicode
    @return: word tag
    @rtype: unicode
    
   منصوب
   مجرور
   مرفوعr   )r	   r)   r
   NOUN_NASEB_LISTJAR_LIST
RAFE3_LISTr   s    r   get_previous_tagrJ      sR     %%D{***}	%	%	%}	'	'	'}sr   r   c                 4   g }|}d}t          |           D ]\  }}t          j        |          }|dk    r|r|dv r|dz  }n|dv r|dz  }n	|dv r|dz  }|dk    r&|d	z  }|d	k    rd|v rd
}nd|v rd}nd|v rd}nd}nd
}n|}|                    |           |S )z Vocalize a number words
    @param wordlist: words to vocalize
    @type wordlist: unicode list
    @param syn_tags: tags about the clause
    @type syn_tags: unicode
    @return: the vocalized wordlist.
    @rtype: unicode
    r   )r   r    u   آلr   rE   )r   rF   )r   rD   r   r   u   بْنِu   بْنُu   بْنَu   بْن)r'   r	   r)   r,   )	r-   syn_tagsnewlisttags	bin_countr1   r   r4   vocs	            r   vocalize_namedrQ      s    G
 DIX&&    4 &t,,66g6EEE%M))%M))%gNIA~~ D((%CC"d**%CC"d**%CC#CC " CsNr   c                    d}g }d}t          |           D ]\  }}t          j        |          }|}|dz   t          |           k     rt          j        | |dz                      }nd}|r|s|d         dv r
|dd         }|r|s|d         dv r
|dd         }|s|dv rd}|                    d	           nP|d
v r2|sd}|                    d	           n1|                    d           n|dv rD|s,d}|                                 |                    d	           n|                    d           n|dv r0|sd}|                    d	           n|                    d           n|s't          |          rd}|                    d	           nu|dk    rX|                    d          r+|                    d          r|                    d           n/|                    d           d}n|                    d           d}|}|S )u  
    Detect named enteties words in a text and return positions of each phrase.

    Example:
        >>> detect_named_position(u"قال خالد بن رافع  حدثني أحمد بن عنبر عن خاله")
        ((1,3), (6,8))

    @param wordlist: wordlist
    @type wordlist: unicode list
    @return: list of numbers clause positions [(start,end),(start2,end2),]
    @rtype: list of tuple

    Fr   r   r   r   Nr   TNBr   NIr#   r$   r%   r&   O)	r'   r	   r)   r(   r,   popr   r*   r+   )	r-   r/   taglistr3   r1   r   r4   r5   r2   s	            r   detect_namedrX      s     JGHX&& 7 74 &t,,Q3X+HQqSM::HHh 	$J 	$HQK;^,^,^|H 	: 	'!*8[*[*[!""+C (	#c]22JNN4     P P P %!
t$$$$t$$$$ O O O %!
t$$$$t$$$$... %!
t$$$$t$$$$ 	#s 3 3 	#JNN4    Q%%g.. '73C3CE3J3J 'NN4((((NN3'''!&JJs###"
Nr   c                    t          |           }d}g }g }d}t          | |          D ]n\  }}|dv r|                    |           |r6t          |          }t	          ||          }|                    |           g }|                    |           |}o|r4t          |          }t	          ||          }|                    |           |S )z
    Detect named words in a text.
    @param wordlist: input text
    @type wordlist: unicode
    @return: wordlist with vocalized named clause
    @rtype: list
    r   )rS   rT   )rX   zipr,   rJ   rQ   extend)	r-   rW   r3   vocalized_listchunkprevious_tagr   tag	vocalizeds	            r   pretashkeel_namedra   E  s     8$$GHNEL7++  	c,LL /99*5,??	%%i000!!$'''HH )'11"5,77	i(((r   )u/   وجد عبد الله بن عمر ديناراut   جاء  خالد بن الوليد وقاتل مسيلمة بن حذام الكذاب في موقعة الحديقةu   روى أحمد بن عقيل الشامي عن أبي طلحة
     المغربي أنّ عقابا بن مسعود بن أبي سعاد قالuX   قال مُحَمَّدُ بْنُ خَالِدُ بْنُ إسماعيلفي حديثهu[   ِنْصَرَفْنَا إِلَى أَنَسُ بْنُ مَالِكَ الْحَديثِr9   )r   )(__doc__
__future__r   r   r   r   sys__name__pathr,   pyarabic.arabyr	   pyarabic.named_constr
   pyarabic.propernounsr   r   	DINENAMEDr   r6   r@   rB   rJ   rQ   rX   ra   TEXTStext1splitpositions_namedprintr)   r;   resultr<   	word_listtag_listrZ   tuplestup r   r   <module>rv      s               


zHOOE"""""".............		, 	, 	,H H HT  0  <  &1 1 1 1fL L LZ  D z  E   //C0@0@AAo$$U++ #">5>%#8#899dii   "EN5))	<	**#h	** 	 	CE#JJJJ	-  r   