
    -iR7                         d Z ddlZddlZddlZddlZddlZdZddlm	Z	 ddl
mZ  G d d          Zd Zed	k    r e             dS dS )
z^
Arabic Dictionary Class from Arramooz Al Waseet.
Used in multiporpus morpholigical treatment
    Nzdata/arabicdictionary.sqlite   )	nountuplec                   B    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
S )ArabicDictionaryum  
    Arabic dictionary Class
    Used to allow abstract acces to lexicon of arabic language, 
    can get indexed and hashed entries from the  basic lexicon
    add also, support to extract attributtes from entries
    
    Example:
        >>> import arramooz.arabicdictionary 
        >>> mydict = arramooz.arabicdictionary.ArabicDictionary('verbs')
        >>> wordlist = [u"استقلّ", u'استقل', u"كذب"]
        >>> tmp_list = []
        >>> for word in wordlist:
        >>>     foundlist = mydict.lookup(word)
        >>>     for word_tuple in foundlist:
        >>>         word_tuple = dict(word_tuple)
        >>>         vocalized = word_tuple['vocalized']
        >>>         tmp_list.append(dict(word_tuple))
        >>> print(tmp_list)
        [{'think_trans': 1, 'passive': 0, 'confirmed': 0, 'vocalized': u'اِسْتَقَلَّ', 'stamped': u'ستقل', 'future_moode': 0, 'triliteral': 0, 'future': 0, 'unthink_trans': 0, 'past': 0, 'unvocalized': u'استقل', 'future_type': u'َ', 'double_trans': 0, 'normalized': u'استقل', 'reflexive_trans': 0, 'imperative': 0, 'transitive': 1, 'root': u'قلل', 'id': 7495},
        {'think_trans': 1, 'passive': 0, 'confirmed': 0, 'vocalized': u'كَذَبَ', 'stamped': u'كذب', 'future_moode': 0, 'triliteral': 1, 'future': 0, 'unthink_trans': 0, 'past': 0, 'unvocalized': u'كذب', 'future_type': u'كسرة', 'double_trans': 0, 'normalized': u'كذب', 'reflexive_trans': 0, 'imperative': 0, 'transitive': 1, 'root': u'كذب', 'id': 1072},
        {'think_trans': 1, 'passive': 0, 'confirmed': 0, 'vocalized': u'كَذَّبَ', 'stamped': u'كذب', 'future_moode': 0, 'triliteral': 0, 'future': 0, 'unthink_trans': 0, 'past': 0, 'unvocalized': u'كذب', 'future_type': u'َ', 'double_trans': 0, 'normalized': u'كذب', 'reflexive_trans': 0, 'imperative': 0, 'transitive': 1, 'root': u'كذب', 'id': 2869}]

    c                 4   i | _         i | _        || _        t          t          d          rt          j        }nAt          j                            t          j        	                    t                              }t          j                            |t                    }t          j                            |          rzd|z   dz   }	 t          j        |dd          | _        t          j        | j        _        | j                                        | _        nc# t(          $ r t+          d|           Y nGw xY wt+          d                    d	|d
t          j        g                              d                     t1          j        dt4          j        t4          j        t4          j        t4          j        t4          j        t4          j         t4          j!        t4          j"        t4          j#        t4          j$        dt0          j%                  | _&        dS )zw
        initialisation of dictionary from a data dictionary, create indexes 
        to speed up the access.

        frozenzfile:z?mode=roFT)check_same_threaduriz(Fatal Error Can't find the database file zInexistant Filez current dir utf8[]N)'
dictionaryattrib_num_index
table_namehasattrsysprefixospathdirnamerealpath__file__joinFILE_DBexistssqliteconnect
db_connectRowrow_factorycursorIOErrorprintcurdirencoderecompilearabyALEFYEHHAMZAALEF_HAMZA_ABOVE	WAW_HAMZA	YEH_HAMZAWAWALEF_MAKSURASHADDA
ALEF_MADDAUNICODE	stamp_pat)selfr   base	file_pathfile_uris        W/var/www/html/speakWrite/venv/lib/python3.11/site-packages/arramooz/arabicdictionary.py__init__zArabicDictionary.__init__3   s     !# % 3!! 	?:DD7??27#3#3H#=#=>>DGLLw//	7>>)$$ 	(i'
2H7"(.UX\"]"]"] /5j+"o4466	  M M M@)LLLLLM $)).	?I  v( ( ( 

		5;; 6 6	%))U%7%7uGWGWGW%Y 
 s   D D43D4c                 n    t          | d          r"| j        r| j                                         dS dS dS )z?
        Delete instance and close database connection
        r   N)r   r   close)r6   s    r:   __del__zArabicDictionary.__del__g   sN     4&& 	( (%%'''''	( 	(( (    c                     d| j         d|d}	 | j                            |           | j        r| j                                        S n# t          j        $ r Y dS w xY wdS )z Get dictionary entry by id from the dictionary
        @param idf :word identifier
        @type idf: integer
        @return: all attributes
        @rtype: dict
        select * FROM  WHERE id=''F)r   r"   executefetchallr   OperationalError)r6   idfsqls      r:   get_entry_by_idz ArabicDictionary.get_entry_by_idp   s      48???CCCH	.K$$$
 { .{++---. ' 	 	 	55	
 us   A AAc                     d| j         d|d}	 | j                            |           | j        r| j        D ]}||         c S n# t          j        $ r Y dS w xY wdS )z Get attribute value by id from the dictionary
        @param idf :word identifier
        @type idf: integer
        @param attribute :the attribute name
        @type attribute: unicode
        @return: The attribute value
        @rtype: mix.
        rA   rB   rC   F)r   r"   rD   r   rF   )r6   rG   	attributerH   rows        r:   get_attrib_by_idz!ArabicDictionary.get_attrib_by_id   s     R 48???CCCH	+K$$$
 { +; + +C	N*** ' 	 	 	55	 us   A AAc                    g }t          j        |          }d| j        d|d}	 | j                            |           | j        r| j        D ]}|                    |           |S # t          $ r g cY S t          j        $ r g cY S w xY w)uI  
        look up for all word forms in the dictionary
        @param normalized: the normalized word.
        @type normalized: unicode.
        @return: list of dictionary entries .
        @rtype: list.

        Example:
            >>> import arramooz.arabicdictionary 
            >>> import arramooz.arabicdictionary
            >>> mydict = arramooz.arabicdictionary.ArabicDictionary('verbs')
            >>> wordlist = [u"استقلّ", u'استقل', u"كذب"]
            >>> tmp_list = []
            >>> for word in wordlist:
            ...     foundlist = mydict.lookup(word)
            ...     for word_tuple in foundlist:
            ...         print(dict(word_tuple))
            ...
            {'id': 4743, 'vocalized': 'اِسْتَقَلَّ', 'unvocalized': 'استقل', 'root': 'قلل', 'normalized': 'استقل', 'stamped': 'ستقل', 'future_type': 'فتحة', 'triliteral': 0, 'transitive': 1, 'double_trans': 0, 'think_trans': 1, 'unthink_trans': 0, 'reflexive_trans': 0, 'past': 0, 'future': 0, 'imperative': 0, 'passive': 0, 'future_moode': 0, 'confirmed': 0}
            {'id': 118, 'vocalized': 'كَذَّبَ', 'unvocalized': 'كذب', 'root': 'كذب', 'normalized': 'كذب', 'stamped': 'كذب', 'future_type': 'فتحة', 'triliteral': 0, 'transitive': 1, 'double_trans': 0, 'think_trans': 1, 'unthink_trans': 0, 'reflexive_trans': 0, 'past': 0, 'future': 0, 'imperative': 0, 'passive': 0, 'future_moode': 0, 'confirmed': 0}
            {'id': 10205, 'vocalized': 'كَذَبَ', 'unvocalized': 'كذب', 'root': 'كذب', 'normalized': 'كذب', 'stamped': 'كذب', 'future_type': 'كسرة', 'triliteral': 1, 'transitive': 1, 'double_trans': 0, 'think_trans': 1, 'unthink_trans': 0, 'reflexive_trans': 0, 'past': 0, 'future': 0, 'imperative': 0, 'passive': 0, 'future_moode': 0, 'confirmed': 0}
            >>>
        rA   z WHERE normalized='rC   )	r)   normalize_hamzar   r"   rD   appendAttributeErrorr   rF   )r6   
normalizedidlistnormwordrH   rL   s         r:   lookupzArabicDictionary.lookup   s    0 (44 <@???		K$$$ { '; ' 'CMM#&&&&M  	 	 	III& 	 	 	III	s   A( (B	6B	B	c                     |                      |          }d| j        d|d}	 | j                            |           | j        rdS n# t          j        $ r Y dS w xY wdS )aK  
        look up for word if exists by using the stamp index, 
        the input word is stamped by removing infixes letters like alef, teh
        the stamped word is looked up in the stamp index
        @param word: word to look for.
        @type word: unicode.
        @return: True if exists.
        @rtype: Boolean.
        zselect id FROM  WHERE stamped='rC   TF)
word_stampr   r"   rD   r   rF   )r6   wordstamprH   s       r:   exists_as_stampz ArabicDictionary.exists_as_stamp   s     %%9=55	K$$${ t' 	 	 	55	us   !A	 	AAc                     g }|                      |          }d| j        d|d}	 | j                            |           | j        r| j        D ]}|                    |           |S # t
          j        $ r g cY S w xY w)aS  
        look up for word if exists by using the stamp index, 
        the input word is stamped by removing infixes letters like alef, teh
        the stamped word is looked up in the stamp index
        @param word: to look for.
        @type word: unicode.
        @return: list of dictionary entries IDs.
        @rtype: list.
        rA   rW   rC   )rX   r   r"   rD   rP   r   rF   )r6   rY   rS   rZ   rH   rL   s         r:   lookup_by_stampz ArabicDictionary.lookup_by_stamp   s     %%8<		K$$${ '; ' 'CMM#&&&&M' 	 	 	III	s   AA) )A=<A=c                 x    |dd         |dd         k    r
|dd         }| j                             d|          S )a  
        generate a stamp for a word, 
        remove all letters which can change form in the word :
            - ALEF, 
            - HAMZA, 
            - YEH, 
            - WAW, 
            - ALEF_MAKSURA
            - SHADDA
        @return: stamped word
        N )r5   sub)r6   rY   s     r:   rX   zArabicDictionary.word_stamp  sD     9be$$9D~!!"d+++r?   N)__name__
__module____qualname____doc__r;   r>   rI   rM   rU   r[   r]   rX    r?   r:   r   r      s         02 2 2h( ( (  44 4 4l) ) )T  (  6, , , , ,r?   r   c                  f   t          d          } g d}g }|D ]U}|                     |          }|D ];}t          |          }|d         }|                    t          |                     <Vt	          t          |                              dd                              d                     dS )z
    main test
    verbs)u   استقلّu
   استقلu   كذب	vocalizedz},z},
zunicode-escapeN)r   rU   dictrP   r$   reprreplacedecode)mydictwordlisttmp_listrY   	foundlist
word_tuplerj   s          r:   mainlyrt   +  s    
 g&&F:::HH . .MM$''	# 	. 	.Jj))J";/IOOD,,----	. 
$x..
 
 f
-
-
4
45E
F
FGGGGGr?   __main__)rf   r'   r   os.pathsqlite3r   r   r   pyarabic.arabyr)   ra   r   r   rt   rc   rg   r?   r:   <module>ry      s     
			             




)            N, N, N, N, N, N, N, N,bH H H 
FHHHHH r?   