
    -i2q                        d Z ddlmZmZmZmZ ddlZddlZddlm	Z
 	 ddlZddlZddlZddlZn## e$ r ddlmZ ddlmZ ddlmZ dd	lmZ Y nw xY w G d
 de          Zd Zd Zd#dZd Zd Zd Zd Zd Zd Zd Zd Zd Z d$dZ!e"dk    r.g dZ#e#D ](Z$ ej%        e$          Z& ee&          Z' e(e'            ee$          Z) ee&          Z* e(e*            e(d+                    e&                      e( e,e*e&                      e(d           d+                     e e&                    Z-ej.        dk     r e(e-/                    d                      n e(e-            e(d!+                    e)                      e(d"            e( ej0         ee$                               (dS dS )%a  
Arabic numbers routins
@author: Taha Zerrouki
@contact: taha dot zerrouki at gmail dot com
@copyright: Arabtechies, Arabeyes, Taha Zerrouki
@license: GPL
@date:2017/02/14
@version: 0.3
# ArNumbers is imported from
license:   LGPL <http://www.gnu.org/licenses/lgpl.txt>
link      http://www.ar-php.org
category  Text
author    Khaled Al-Shamaa <khaled.alshamaa@gmail.com>
copyright 2009 Khaled Al-Shamaa
    )absolute_importprint_functionunicode_literalsdivisionN)	text_type   )araby)number_const)named_const)arabreprc                   P    e Zd ZdZi ZdZdZd Zd Zd Z	d Z
d Zdd	Zd
 Zd ZdS )	ArNumbersz
    Arabic number class
    r   c                 J    t           j        | _        t           j        | _        d S )N)nbconstINDIVIDUALS_individualCOMPLICATIONScomplicationsselfs    M/var/www/html/speakWrite/venv/lib/python3.11/site-packages/pyarabic/number.py__init__zArNumbers.__init__4   s    ".$2    c                 &    d}|dv r|| _         nd}|S )z
         Set feminine flag of the counted object
         @param value: value Counted object feminine (1 for masculine & 2 for feminine)
         @type value: integer
         @return: True if success, or False if fail
         @rtype: boolean
         Tr      F	_femininer   valueflags      r   set_femininezArNumbers.set_feminine8   s%     F??"DNNDr   c                 &    d}|dv r|| _         nd}|S )a  
        Set the grammar position flag of the counted object
        @param value: Grammar position of counted object (1 if Marfoua & 2 if Mansoub or Majrour)
        @type value: integer
        @return: True if success, or False if fail
        @rtype: boolean
        Tr   F_formatr   s      r   
set_formatzArNumbers.set_formatH   s%     F?? DLLDr   c                     | j         S )z
        Get the feminine flag of counted object
        @return: return current setting of counted object feminine flag
        @rtype: integer
        r   r   s    r   get_femininezArNumbers.get_feminineY   s     ~r   c                     | j         S )z
        Get the grammer position flag of counted object
        @return: return current setting of counted object grammer position flag
        @rtype: integer
        r$   r   s    r   
get_formatzArNumbers.get_formata   s     |r   Nc                 0   |                     d          }|                     |d                   }t          |          dk    r#|                     |d                   }|d|z   z  }|r-||                                }|                    |d|          }|S )a  
         Spell integer number in Arabic idiom
         @param number: The number you want to spell in Arabic idiom
         @type number: integer
         @param  output_charset: (optional) Output charset [utf-8|windows-1256|iso-8859-6]
         default value is None (use set output charset)
         @type  output_charset: string
         @param main:  Main Ar-PHP object to access charset converter options
         @type main: object
         @return: The Arabic idiom that spells inserted number
         @rtype: string
         .r   r   u    فاصلة Nzutf-8)split_int2strlengetOutputCharsetcoreConvert)r   numberoutput_charsetmaintempstringdecs          r   int2strzArNumbers.int2strj   s     ||C  tAw''t99q==--Q((Co++F 	G%!%!6!6!8!8%%fg~FFFr   c                    g }g }d}	 t          |          }n# t          $ r d}Y nw xY wt          |          dk    rd}t          |          dk    rJ|                    |dd                    |dt          |          dz
           }t          |          dk    J|                    |           t          |          dz
  }|}|dk    rt	          j        t          ||                             }|                     |          }	|	r|dk    r|dk    r| j        |         d         }	np|dk    r|dk    r| j        |         | j                 }	nK|dk    r&|d	k     r |dk    r|	d
| j        |         d         z   z  }	n|dk    r|	d
| j        |         d         z   z  }	|                    |	           |dz  }|dk    d	                    |          }nd}|S )z
        Spell integer number in Arabic idiom
        @param number_str: The number you want to spell in Arabic idiom
        @type number_str: integer.
        @return: The Arabic idiom that spells inserted number
        @rtype:string
         r      Nr      r           و    صفر)
int
ValueErrorr/   appendmathfloor_written_blockr   r%   join)
r   
number_strblocksitemsr6   r2   number_part
blocks_numitexts
             r   r.   zArNumbers._int2str   s    	__FF 	 	 	FFF	v;;??Kj//A%%jo...'(<Z1)<(<=
 j//A%% MM*%%%VqJAq&&Cq	NN33**622 	'{{qAvv#1!4Q71a#1!4T\B!Qt'9!'<Q'? ??at'9!'<Q'? ??LL&&&Q q&& \\%((FFFs    ''c                 
   g }d}t          |          }|dk    rqt          j        |dz            dz  }|dz  }|dk    r,|                    | j        |         | j                            n |                    | j        |                    |dk    s|dk    r8|                    | j        |         | j                 | j                            n|dk     r9|                    | j        t          |                   | j                            n|dz  }t          j        |dz            dz  }t          |          }|dk    r7|                    | j        |         | j                 | j                            n1|d	k    r+|                    | j        |         | j                            |                    | j        |         | j                            d|v r|                    d           d
                    |          }|S )a'  
        Spell sub block number of three digits max in Arabic idiom
        @param number: number Sub block number of three digits max you want to spell in Arabic idiom
        @type number: integer
        @return: The Arabic idiom that spells inserted sub block
        @rtype: String
        r:   c   d      r         
   r   r@   )	rB   rE   rF   rD   r   r%   r   removerH   )r   r2   rK   r6   hundredonestenss          r   rG   zArNumbers._written_block   s    VB;;j#..4Gc\F#~~T-g6t|DEEEET-g6777Q;;&B,,LL)&1$.A$,OPPPPb[[LL)#f++6t~FGGGGB;D:frk**R/Dt99Dqyy$T*4>:4<HJ J J JT-d3DNCDDDLL)$/=>>>%<<LLe$$r   )NN)__name__
__module____qualname____doc__r   r   r%   r   r"   r&   r(   r*   r8   r.   rG    r   r   r   r   )   s          KIG
3 3 3     "       0* * *X& & & & &r   r   c                    d}d}t          j        |           } |                     d          }|D ]}|r|dk    r|d         dv r
|dd         }|dk    r|                    d          r
|dd         }|t          j        v rCt          j        |         }|dz  dk    r|dk    rd}|||z  z  }d}|t          j        |         z  }||z  }|S )	u'  
    Convert arabic text into number, for example convert تسعة وعشرون = >29.

    Example:
        >>> text2number(u"خمسمئة وثلاث وعشرون")
        523

    @param text: input text
    @type text: unicode
    @return: number extracted from text
    @rtype: integer
    r   r?      واحد   و   ف   ل   ب   كr   Nrc     )r	   strip_tashkeelr-   
startswithr   NUMBER_WORDS)rO   totalpartialwordswordactualnumbers         r   text2numberrq      s    EG%%DJJtE 6 6 	DK''DG7Z,Z,Z8D;4??5#9#98D7'''"/5Ld"a''a<<G<//7/55	WELr   c                 `   t          |           t          u st          |           t          u rt          |           } nPt          |           t          u st          |           t          u r"	 t          |           }n# t
          $ r Y dS w xY wdS t                      }|                    |           S )u  
    Convert number to arabic words, for example convert 25 --> خمسة و عشرون

    Example:
        >>> number2text(523)
        خمسمئة وثلاث وعشرون

    @param anumber: input number
    @type anumber: int
    @return: number words
    @rtype: unicode
    rA   )	typerB   floatstrunicoderC   r   r8   rl   )anumberaarbns      r   number2textrz     s     G}}tG}}55g,,	g#		g'!9!9	gAA 	 	 	99	 y;;D<<   s   *A: :
BBr:   c                 *   g }d}d}|}t          |           dk    r| d         }t          j        |          }|}|}	|r,| s*|dk    r$|d         dv r|d         dv r|dz  }|dd         }n%|dk    r|                    d	          r
|dd         }|t          j        v r|d
vr|t          j        |         d         z   }	|	gS t          |           D ]\  }
}t          j        |          }|}|
dk    r,|r*|dk    r$|d         dv r|d         dv r|dz  }|dd         }n%|dk    r|                    d	          r
|dd         }|t          j        v rJ|                    d          r|dz  }|                    d          s|                    d          r|dz  }d}t          |           D ]g\  }
}|
dz   t          |           k     r| |
dz            }nd}|}|r6|dk    r0|d         dv r&|dd         }|d         }|dv r|dz  }n|dv r|dz  }nd}|t          j        v rd}	t          j        |         d         dk    r|t          j        |         d         z   }	n|dk    s|dk    r|t          j        |         d         z   }	n^|dk    r|t          j	        v rd}	nF|dk    r|t          j
        v rd}	n.d|v rM|                    d	          r|t          j        |         d         z   }	n|t          j        |         d         z   }	nd|v r|t          j        |         d         z   }	nd|v rM|                    d	          r|t          j        |         d         z   }	n|t          j        |         d         z   }	nld |v rM|                    d	          r|t          j        |         d!         z   }	n7|t          j        |         d         z   }	n|t          j        |         d         z   }	|                    |	           n|                    ||z              |}i|S )"u   Vocalize a number words clause

    Example:
        >>> txt = u"خمسمئة وثلاثة وعشرين"
        >>> wordlist = araby.tokenize(txt)
        >>> vocalized = vocalize_number(wordlist)
        >>> print u" ".join(vocalized)
        خَمْسمِئَة وَثَلاثَة وَعِشْرِينَ

    @param wordlist: words to vocalize
    @type wordlist: unicode list
    @param syn_tags: tags about the clause
    @type syn_tags: unicode
    @return: the vocalized wordlist.
    @rtype: unicode
    r:   r   r   ra   rb   )re   rf   rg   
   مجرورNrc   )   عشرu   خمسu   سبعu   تسعu   خمساu   سبعاu   تسعاu   عشراu
   ألفين   عشرةrA   u   ألفrN   u   ينu
   مجهولu   انu   ون
   مرفوع)rc   rd   rg   u   َ)re   rf   u   ِs*r}   r~   nu   عَشَرَu   عَشْرَةَr2rj2j
   منصوبn2)r/   r	   ri   rj   r   rk   VOCALIZED_NUMBER_WORDS	enumerateendswithNUMBER_TEN_MASCULIN_UNITSNUMBER_TEN_FEMININ_UNITSrD   )wordlistsyn_tagsnewlistprefixnextwordtagsro   word_nmkeyvocrN   pre_keys               r   vocalize_numberr   !  s   " GFHD
8}}{&t,, 	8 	;(>(>GBBBqz222%qrr(CC##(:(:5(A(A#!""+C'&&& u u u79#>sCCCwX&& & &4 &t,,66g6'["8"8Q>>>qz222%qrr(CC##(:(:5(A(A#!""+C'&&&(( &%!!'** &g.>.>w.G.G &% GX&& 8 84q53x==  AHHH 		DK''Q>>>qrr(C!WF...%>))%F'000C-c2373>>w=cB3GG Y&&(k*A*Aw=cB3GG	!!g1R&R&R%##73S(S(S)$&&&&u-- L 7#A##Ft#LLCC 7#A##Fs#KKCC$&&w=cB3GG$&&&&u-- L 7#A##Ft#LLCC 7#A##Fs#KKCC$&&&&u-- C 7#A##Ft#LLCC !8=cBCCC w=cB3GGNN3NN6C<(((Nr   c                     | t           j        v S )z
    return if the given word is a unit
    @param word: given word to be tested
    @type word: unicode
    @return: if word is a unit return True else False.
    @rtype: Boolean
    )r   
UNIT_WORDSro   s    r   is_unitr     s     7%%%r   c                    t          j        |          }t          |          s|S d}|}| dk    r| dk    r|S | dz  dk    s	| dz  dk    rd}t          j        |         d         }nh| dz  dk    r|d	z  }t          j        |         d
         }nA| dz  dk     r|dz  }t          j        |         d         }nd}t          j        |         d         }|sd|z   S |S )z Vocalize a number words
    @param numeric: given number
    @type numeric: integer
    @param unit: unit to vocalize
    @type unit: unicode
    @return: the vocalized unit, or unit word if itsnt a unit word.
    @rtype: unicode
    r:   r   r   rR   rh   SingleMajrourrx   rV   PluralpSingleMansoubr   rN   Error)r	   ri   r   r   r   )numericunitunit_nmr   vocalizedunits        r   vocalize_unitr     s    "4((G7 DM
 !||1
 
3!		w~22*73C8 
3"		*73C8	3		*73C8*73C8 ~r   c                     t          j        |           } | t          j        v rdS | t          j        v rdS | t          j        v rdS dS )zsGet the word tags
    @param word: given word
    @type word: unicode
    @return:word tag
    @rtype: unicode
    r   r|   r   r:   )r	   ri   nmconstNOUN_NASEB_LISTJAR_LIST
RAFE3_LISTr   s    r   get_previous_tagr     sR     %%Dw&&&}	!	!	!}	#	#	#}sr   c           	      b   g }t          j        |           }t          |          }|D ]}t          |          dk    rq|d         t          |          k    rX|d         t          |          k    r?|                    d                    ||d         |d         dz                                 |S )u  
    Extract number words in a text.

    Example:
        >>> extract_number_phrases(u"وجدت خمسمئة وثلاثة وعشرين دينارا فاشتريت ثلاثة عشر دفترا")
        خمسمئة وثلاثة وعشرين
        ثلاثة عشر

    @param text: input text
    @type text: unicode
    @return: number words extracted from text
    @rtype: integer
    r   r   r   r?   r	   tokenizedetect_number_phrases_positionr/   rD   rH   )rO   phrasesr   	positionsposs        r   extract_number_phrasesr     s     G~d##H.x88I H Hs88q==1vX&&3q6S]]+B+Btyy#a&#a&1*2D)EFFGGGNr   c           
         g }t          j        |           }t          |          }|D ]}t          |          dk    r|d         t          |          k    r|d         t          |          k    r|d         dz
  dk    r||d         dz
           }nd}|d         dz   t          |          k     r||d         dz            }nd}|                    |d                    ||d         |d         dz                      |f           |S )u  
    Extract number words in a text within context.

    Example:
        >>> extract_number_context(u"وجدت خمسمئة وثلاثة وعشرين دينارا فاشتريت ثلاثة عشر دفترا")
        ‎وجدت، خمسمئة وثلاثة وعشرين، دينارا
        ‎فاشتريت، ثلاثة عشر ، دفتر

    @param text: input text
    @type text: unicode
    @return: number words extracted from text
    @rtype: integer
    r   r   r   r:   r?   r   )rO   r   r   r   r   prevr   s          r   extract_number_contextr     s    G~d##H.x88I N Ns88q==1vX&&3q6S]]+B+Bq6A:??#CFQJ/DDDq6A:H--'A
3HH"H499Xc!fSVaZ.?%@AA8LN N NNr   c                 >   g }d}d}t          |           D ]\  }}|dz   t          |           k     rt          j        | |dz                      }nd}t          j        |          }|}|r|s|dk    r|d         dv r|dd         }n%|dk    r|                    d          r
|dd         }|t
          j        v s|                                r|d	vs|d
v r
|dk     r|}|}|dk    r|                    ||f           d}|dk    r|                    ||f           |S )uQ  
    Detect number words in a text and return positions of each phrase.

    Example:
        >>> txt = u"وجدت خمسمئة وثلاثة وعشرين دينارا فاشتريت ثلاثة عشر دفترا"
        >>> wordlist = araby.tokenize(txt)
        >>> positions_phrases = detect_number_phrases_position(wordlist)
        >>> print positions_phrase
        >>> print positions_phrases
        [(1, 3), (6, 7)]

    @param wordlist: wordlist
    @type wordlist: unicode list
    @return: list of numbers clause positions [(start,end),(start2,end2),]
    @rtype: list of tuple
    Fr   Nra   r   rb   rc   u   أحدu   إحدىu   اثناu   اثنيu
   اثنتيu
   اثنتاr}   r~   	r   r/   r	   ri   rj   r   rk   	isnumericrD   )	r   r   startnumber	endnumberrN   ro   r   r   r   s	            r   r   r   $  sy   & GKIX&&  4q53x==  +HQUO<<HHH&t,, 	; 	7k+A+AAJ"EEE!""+CC##(:(:5(A(A#!""+C'&&&#--//& * * *-59Q-Q-Q??"#K	 aY7888KKaY/000Nr   c                 p   d}g }t          |           D ] \  }}|dz   t          |           k     rt          j        | |dz                      }nd}t          j        |          }|}|r|s|dk    r|d         dv r|dd         }n%|dk    r|                    d          r
|dd         }|t
          j        v s|                                rQ|dvs|d	v r0|s|                    d
           d}|                    d           d}|                    d           	d}|                    d           "|S )u  
    Detect number words in a text and return a taglist as BIO.

    Example:
        >>> wordlist = araby.tokenize(u"وجدت خمسمئة وثلاثة وعشرين دينارا فاشتريت ثلاثة عشر دفترا")
        >>> detect_numbers(wordlist)
        ['DO', 'DB', 'DI', 'DI', 'DO', 'DO', 'DB', 'DI', 'DO']

    @param wordlist: wordlist
    @type wordlist: unicode list
    @return: list of tags BIO
    @rtype: list of unicode
    Fr   Nra   r   rb   rc   r   r   DBTDIOr   )r   startstaglistrN   ro   r   r   r   s           r   detect_numbersr   \  s}    FGX&&    4q53x==  +HQUO<<HHH&t,, 	6 	g&<&<AJ"EEE!""+CC##(:(:5(A(A#!""+C'&&&#--//& * * *-59Q-Q-Q )NN4(((!FFNN4((((s####FNN3Nr   c           
      2   t          |           }|D ]}t          |          dk    rl|d         }|d         }|d         }|}t          |          }t          |          }t	          j        |                              d          }	t          |	|          }
t	          j        ||
          }t          ||          }t	          j        ||          }|dk     rt          d                    t          |          d                    |          d                    |
          g                     t          t          |          d                    |||g                     t          d                    ||t          |          g                     dS )u=  
    Detect number words in a text.

    Example:
        >>> detect_number_words(u"وجدت خمسمئة وثلاثة وعشرين دينارا")
        خمسمئة وثلاثة وعشرين

    @param text: input text
    @type text: unicode
    @return: number words extracted from text
    @rtype: integer
    r;   r   r   r   r?   	N)r   r/   rq   r   r	   ri   r-   r   vocalized_similarityr   printrH   ru   )rO   phrases_contextph_conpreviousphraser   numberedwordsr   r   r   	vocalizedsimvoc_unitsim_units                 r   detect_number_wordsr     su    -T22O! G Gv;;!ayHAYFayH"M!-00G#H--D+M::@@EEH'$77I,]IFFC$Wh77H1(HEEHQww ejj#c((DIIm,D,DdiiPYFZFZ![\\]]]c'llDIIx.J$K$KLLLejj(Hc(mm!DEEFFF+G Gr   c                    t          |           }d}g }g }d}t          | |          D ]n\  }}|dv r|                    |           |r6t          |          }t	          ||          }|                    |           g }|                    |           |}o|r't	          ||          }|                    |           g }|S )uW  
    Vocalized a number clauses in a text.

    Example:
        >>> txt = u"وجدت خمسمئة وثلاثة وعشرين دينارا فاشتريت ثلاثة عشر دفترا"
        >>> wordlist = araby.tokenize(txt)
        >>> vocalized = pre_tashkeel_number(wordlist)
        >>> print u" ".join(vocalized)
        وجدت خَمْسمِئَة وَثَلاثَة وَعِشْرِينَ دينارا فاشتريت ثَلاثَةَ عَشَرَ دفترا

    @param wordlist: input text
    @type wordlist: unicode
    @return: wordlist with vocalized number clause
    @rtype: list
  r:   )r   r   )r   ziprD   r   r   extend)	r   r   r   vocalized_listchunkprevious_tagro   tagr   s	            r   pre_tashkeel_numberr     s      X&&GHNEL7++  	c,LL /99+E<@@	%%i000!!$'''HH #E<88	i(((r   Fc                 <   d}t          |           t          u rt          |           } nPt          |           t          u st          |           t          u r"	 t          |           }n# t          $ r Y dS w xY wdS |dk    r|rdS dS t                      }|                    |           }|                    dd          }|                    d	          }g }|rt          j
        }nt          j        }|r|d         }||v r|                    ||          }d
|z   }|                    |           |dd         D ]_}|                    t          j                  r)|dd         }||v r|                    ||          }d|z   }|                    |           `|dd         dk    r*|                                 |                    d           n7|dd         dk    r)|                                 |                    d           d	                    |          }	|	S )uB  
    Convert number to arabic words in ordinal form, for example convert 25 --> الخامس والعشرون
    
      Example:
        >>> number2text(523)
        الخمسمئة والثالث والعشرون
    @param anumber: input number
    @type anumber: int
    @return: number words
    @rtype: unicode
    r   rA   u
   الصفرr   u   الأولىu
   الأولu   و rc   r?   u   الNu   والr   u   الحاديu   الواحدu   الحاديةu   الواحدة)rs   rB   ru   rv   rC   r   r8   replacer-   r   UNITS_ORDINAL_WORDS_FEMININUNITS_ORDINAL_WORDSgetrD   rj   r	   WAWpoprH   )
rw   femininrx   ry   arbn_strtokensnew_listordinal_wordstokordinal_strings
             r   number2ordinalr     sE    	
AG}}g,,	g#		g'!9!9	GAA 	 	 	99	 }Avv 	!"? =;;D||G$$H ..H^^C  F H 4;3 !Qi-##C--Cck!"": 		! 		!C~~ei(( $!""g-'''++C55CmOOC    }''((((	"##+	+	+)***YYx((Ns   A& &
A43A4__main__)u-   مليونان وألفان وإثنا عشرu4   جاء مليونان وألفان وإثنا عشرu:   وجدت خمسمئة وثلاث وعشرون ديناراu1   خمسمئة وثلاث وعشرون ديناراh   وجدت خمسمئة وثلاثة وعشرين دينارا فاشتريت ثلاثة عشر دفتراu   لم أجد شيئاr   *   من ثلاثمئة وخمسين بلدا r   u*   من أربعمئة وخمسين بلدا u   السلام عليكم 2014r?   tashkeel)r;   r   utf8r   zdetect number word)r:   )F)1r^   
__future__r   r   r   r   rE   syssixr   rv   r	   r
   r   r   r   r   ImportErrorr:   objectr   rq   rz   r   r   r   r   r   r   r   r   r   r   r   r[   TEXTStxtr   	word_listpositions_phrasesr   
nb_phrasestag_listrH   r   r   version_infoencodeareprr_   r   r   <module>r      s                



 $ $ $ $ $ $
LLL""""!!!!OOOO   ))))))((((((	k k k k k k k k\' ' 'T  @w w w wt& & &, , ,^  &  4  D5 5 5p/ / /d$G $G $GN% % %PH H H HT z  E  8 8"EN3''	::9EE   ++C00
!>),,hdii	""###cc(I&&'''j9900;;<<f$$E(//&))****E(OOOejj$$%%%"###nhn0055667777A 8 8s   / AA