
    -iY=                         d Z ddlmZmZmZmZ ddlZddlZddlZe	dk    rej
                            d           ddlZdZddlmZ dddd	d
ddZ G d d          Zd Ze	dk    r e             dS dS )z~
Arabic Word frequency Dictionary Class from Arramooz Al Waseet.
Used in multiporpus morpholigical treatment
                
    )absolute_importprint_functionunicode_literalsdivisionN__main__z../zdata/wordfreq.sqlite               )id	vocalizedunvocalized	word_typefreqfuture_typec                   F    e Zd ZdZedfdZd Zd Zd Zd Z	dd	Z
dd
ZdS )WordFreqDictionaryuz  
        Arabic dictionary Class
        Used to allow abstract acces to lexicon of arabic language, 
        can get indexed and hashed entries from the  basic lexicon
        add also, support to extract attributtes from entries

        Example:
            >>> mydict = WordFreqDictionary('wordfreq')
            >>> wordlist = [u"صلاة", u'كرة', u"قَطَرً", u"أَرْض"]
            >>> for word in wordlist:
            >>>    print("word freq", mydict.get_freq(word))
            >>>    idlist = mydict.lookup(word)
            >>>    for row in idlist:
            >>>        row = dict(row)
            >>>        print('frequency', row['freq'])
            >>>        print(repr(row).decode("unicode-escape"))        
            صلاة  0
            word freq 0
            كرة  0
            word freq 0
            قَطَرً  2
            [(984, u'قَطَر', u'قطر', u'noun_prop', 154772, u'َ'), (13874, u'قَطَر', u'قطر', u'verb', 1859, u'َ')]
            word freq 154772
            frequency 154772
            {'vocalized': u'قَطَر', 'word_type': u'noun_prop', 'unvocalized': u'قطر', 'future_type': u'َ', 'freq': 154772, 'id': 984}
            frequency 1859
            {'vocalized': u'قَطَر', 'word_type': u'verb', 'unvocalized': u'قطر', 'future_type': u'َ', 'freq': 1859, 'id': 13874}
            أَرْض  1
            [(349, u'أَرْض', u'أرض', u'noun', 389839, u'َ')]
            word freq 389839
            frequency 389839
            {'vocalized': u'أَرْض', 'word_type': u'noun', 'unvocalized': u'أرض', 'future_type': u'َ', 'freq': 389839, 'id': 349}

    r   c                    i | _         || _        || _        i | _        | j                                        D ]}| j        |         }|| j        |<   || _        t          t          d          rt          j        }nAt          j
                            t          j
                            t                              }t          j
                            |t                    }t          j
                            |          rzd|z   dz   }	 t#          j        |dd          | _        t"          j        | j        _        | j                                        | _        nc# t.          $ r t1          d|           Y nGw xY wt1          d                    d	|d
t          j        g                              d                     d}	|                     |	           dS )zw
        initialisation of dictionary from a data dictionary, 
        create indexes to speed up the access.

        frozenzfile:z?mode=roTF)uricheck_same_threadz(Fatal Error Can't find the database file zInexistant Filez current dir utf8r   N)
dictionaryattrib_indexkey_attributeattrib_num_indexkeys
table_namehasattrsysprefixospathdirnamerealpath__file__joinFILE_DB_FREQexistssqliteconnect
db_connectRowrow_factorycursorIOErrorprintcurdirencodecreate_table_index)
selfr    r   r   kvalbase	file_pathfile_uriindex_fields
             ^/var/www/html/speakWrite/venv/lib/python3.11/site-packages/arramooz/wordfreqdictionaryclass.py__init__zWordFreqDictionary.__init__L   s    (+ " "'')) 	+ 	+A#A&C)*D!#&&$3!! 	AZDDW__RW%5%5h%?%?@@DGLL|44	7>>)$$ 	)y(3H9$*N8Y^$`$`$` 17
+ $ 6 6 8 8	  M M M@)LLLLLM 499/OY  ) ) ) $,,,,,s   E E54E5c                 n    t          | d          r"| j        r| j                                         dS dS dS )zH
        Delete instance and close database connection
        
        r.   N)r!   r.   close)r7   s    r>   __del__zWordFreqDictionary.__del__}   sN    
 4%% 	( (%%'''''	( 	(( (    c                     d| j         d|d}	 | j                            |           | j        rdS dS # t          j        $ r Y dS w xY w)z create the database index if not exists
        @param index_field: the given to be indexed field
        @type index_field: text
        @return: void
        @rtype: void
        z&create index if not exists myindex on z ()TFN)r    r1   executer,   OperationalError)r7   r=   sqls      r>   r6   z%WordFreqDictionary.create_table_index   sw      	&	K$$${ t  & 	 	 	55	s   !5 AAc                     d| j         d|d}	 | j                            |           | j        r| j                                        S n# t          j        $ r Y dS w xY wdS )z Get dictionary entry by id from the dictionary
        @param idf :word identifier
        @type idf: integer
        @return: all attributes
        @rtype: dict
        select * FROM z WHERE id=''F)r    r1   rF   fetchallr,   rG   )r7   idfrH   s      r>   get_entry_by_idz"WordFreqDictionary.get_entry_by_id   s      6:___cccJ	.K$$$ { .{++---. & 	 	 	55	 us   A AAc                     d| j         |fz  }	 | j                            |           | j        r| j        D ]}||         c S n%# t          j        $ r t          d           Y dS w xY wdS )a   Get attribute value by id from the dictionary
        @param idf :word identifier
        @type idf: integer
        @param attribute :the attribute name
        @type attribute: unicode
        @return: The attribute
        value
        @rtype: mix.
        zselect * FROM %s WHERE id='%d'z,Fatal Error on query: wordfreq dict error 12F)r    r1   rF   r,   rG   r3   )r7   rM   	attributerH   rows        r>   get_attrib_by_idz#WordFreqDictionary.get_attrib_by_id   s    X 2T_c4JJ	+K$$$
 { +; + +C	N*** & 	 	 	@AAA55	 us   A A%$A% c                    g }t          j        |dd                   r
|dd         }t          j        t           j        t           j        z   t           j        |          }|dk    rd| j        d|d}n%d| j        d|d}|dk    r|d	z  }n|d
k    r|dz  }	 | j                            |           | j        r| j        D ]}|	                    |           |S # t          j        $ r t          d           g cY S w xY w)u  
        look up for all word forms in the dictionary, according to word_type
            - 'verb': lookup for verb only.
            - 'noun': look up for nouns.
            - 'unknown': the word is not alayzed, then search for unvocalized word.
            - '': look for voaclize word without type

        
        Example:
            >>> mydict = WordFreqDictionary('wordfreq')
            >>> wordlist = [u"صلاة", u'كرة', u"قَطَرً", u"أَرْض"]
            >>> for word in wordlist:
            >>>    print("word freq", mydict.get_freq(word))
            >>>    idlist = mydict.lookup(word)
            >>>    for row in idlist:
            >>>        row = dict(row)
            >>>        print('frequency', row['freq'])
            >>>        print(repr(row).decode("unicode-escape"))        
            صلاة  0
            word freq 0
            كرة  0
            word freq 0
            قَطَرً  2
            [(984, u'قَطَر', u'قطر', u'noun_prop', 154772, u'َ'), (13874, u'قَطَر', u'قطر', u'verb', 1859, u'َ')]
            word freq 154772
            frequency 154772
            {'vocalized': u'قَطَر', 'word_type': u'noun_prop', 'unvocalized': u'قطر', 'future_type': u'َ', 'freq': 154772, 'id': 984}
            frequency 1859
            {'vocalized': u'قَطَر', 'word_type': u'verb', 'unvocalized': u'قطر', 'future_type': u'َ', 'freq': 1859, 'id': 13874}
            أَرْض  1
            [(349, u'أَرْض', u'أرض', u'noun', 389839, u'َ')]
            word freq 389839
            frequency 389839
            {'vocalized': u'أَرْض', 'word_type': u'noun', 'unvocalized': u'أرض', 'future_type': u'َ', 'freq': 389839, 'id': 349}

        @param text:vocalized word.
        @type text: unicode.
        @param word_type: the word type can take 'verb', 'noun', 'unknwon', ''.
        @type word_type: unicode.        
        @return: list of dictionary entries IDs.
        @rtype: list.
    
        NunknownrJ   z WHERE unvocalized='rK   z WHERE vocalized='verbz AND word_type='verb' nounz AND word_type!='verb' z>Fatal Error can't execute query: file: wordfrequencydictionary)araby	is_harakaresubFATHAALEFr    r1   rF   appendr,   rG   r3   )r7   textr   idlistrH   rQ   s         r>   lookupzWordFreqDictionary.lookup   sN   X  ?49%% 	9D vek%*,ej$??	!!!OOOTTT#CC "
 OOOTTT#CF""//f$$00
	K$$$
 { '; ' 'CMM#&&&&M & 	 	 	RSSSIII	s   C  D ?D c                     g }|                      ||          }|r+t          |d                                       dd          }|S dS )u  
        return the word frequency from the in the dictionary

                
        Example:
            >>> mydict = WordFreqDictionary('wordfreq')
            >>> wordlist = [u"صلاة", u'كرة', u"قَطَرً", u"أَرْض"]
            >>> for word in wordlist:
            >>>    print("word freq", mydict.get_freq(word))
            >>>    idlist = mydict.lookup(word)
            >>>    for row in idlist:
            >>>        row = dict(row)
            >>>        print('frequency', row['freq'])
            >>>        print(repr(row).decode("unicode-escape"))        
            صلاة  0
            word freq 0
            كرة  0
            word freq 0
            قَطَرً  2
            [(984, u'قَطَر', u'قطر', u'noun_prop', 154772, u'َ'), (13874, u'قَطَر', u'قطر', u'verb', 1859, u'َ')]
            word freq 154772
            frequency 154772
            {'vocalized': u'قَطَر', 'word_type': u'noun_prop', 'unvocalized': u'قطر', 'future_type': u'َ', 'freq': 154772, 'id': 984}
            frequency 1859
            {'vocalized': u'قَطَر', 'word_type': u'verb', 'unvocalized': u'قطر', 'future_type': u'َ', 'freq': 1859, 'id': 13874}
            أَرْض  1
            [(349, u'أَرْض', u'أرض', u'noun', 389839, u'َ')]
            word freq 389839
            frequency 389839
            {'vocalized': u'أَرْض', 'word_type': u'noun', 'unvocalized': u'أرض', 'future_type': u'َ', 'freq': 389839, 'id': 349}

        @param text:vocalized word.
        @type text: unicode.
        @param word_type: the word type can take 'verb', 'noun', 'unknwon', ''.
        @type word_type: unicode.        
        @return: word freq.
        @rtype: integer.
        

        r   r   )rb   dictget)r7   r`   r   ra   r   s        r>   get_freqzWordFreqDictionary.get_freq5  sP    R T9--  	q	??&&va00DK1rC   N)rS   )__name__
__module____qualname____doc__WORDFREQ_DICTIONARY_INDEXr?   rB   r6   rN   rR   rb   rf    rC   r>   r   r   (   s        ! !F 1J[h *- *- *- *-b( ( (  "  46 6 6pH H H HT2 2 2 2 2 2rC   r   c                  X   t          d          } g d}|D ]}t          d|                     |                     |                     |          }|D ]V}t	          |          }t          d|d                    t          t          |                              d                     WdS )z
    main test
    wordfreq)u   صلاةu   كرةu   قَطَرًu
   أَرْضz	word freq	frequencyr   zunicode-escapeN)r   r3   rf   rb   rd   reprdecode)mydictwordlistwordra   rQ   s        r>   mainlyru   h  s      
++FGGGH 6 6k6??400111t$$ 	6 	6Cs))C+s6{+++$s))""#3445555	66 6rC   )rj   
__future__r   r   r   r   r[   r"   r$   rg   r%   r_   sqlite3r,   r*   pyarabic.arabyrY   rk   r   ru   rl   rC   r>   <module>ry      s,   
            
			 (//%    &      	         @
6 6 6  
FHHHHH rC   