
    -i4                         d Z ddlmZmZmZmZ ddlZddlZddlZ	dZ
ddlmZ  G d d          Zd Zedk    r e             dS dS )	zh
Arabic Stop Word Dictionary Class from Arramooz Al Waseet.
Used in multiporpus morpholigical treatment
    )absolute_importprint_functionunicode_literalsdivisionNzdata/stopwords.sqlitec                   >    e Zd ZdZddZd Zd Zd Zd Zd Z	d	 Z
d
S )StopWordsDictionaryu  
        Arabic dictionary Class
        Used to allow abstract acces to lexicon of arabic language, 
        can get indexed and hashed entries from the  basic lexicon
        add also, support to extract attributtes from entries
        
        Example:
            >>> mydict = StopWordsDictionary('classedstopwords')
            >>> wordlist = [u"بعضهما", u'في', u"منً", u"أن", u'عندما']
            >>> tmp_list =[]
            >>> for word in wordlist:
            >>>    print("-------")
            >>>    print("word looked up ", mydict.is_stopword(word))
            >>>    idlist = mydict.lookup(word)
            >>>    for word_tuple in idlist:
            >>>        tmp_list.append(dict(word_tuple))
            >>> print(tmp_list)
            [{'definition': 0, 'qasam': 1, 'object_type': u'اسم', 'vocalized': u'فِي', 'conjonction': 1, 'pronoun': 1, 'defined': 0, 'interrog': 1, 'is_inflected': 0, 'word_type': u'حرف', 'preposition': 0, 'need': u'', 'conjugation': 0, 'word_class': u'حرف جر', 'action': u'جار', 'WORD': u'في', 'ID': 203, 'tanwin': 0},
             {'definition': 0, 'qasam': 0, 'object_type': u'اسم', 'vocalized': u'فِي', 'conjonction': 1, 'pronoun': 1, 'defined': 0, 'interrog': 0, 'is_inflected': 0, 'word_type': u'اسم', 'preposition': 1, 'need': u'', 'conjugation': 0, 'word_class': u'الأسماء الخمسة', 'action': u'جار', 'WORD': u'في', 'ID': 304, 'tanwin': 0},
             {'definition': 0, 'qasam': 1, 'object_type': u'فعل', 'vocalized': u'أَنْ', 'conjonction': 1, 'pronoun': 0, 'defined': 0, 'interrog': 0, 'is_inflected': 0, 'word_type': u'حرف', 'preposition': 1, 'need': u'', 'conjugation': 0, 'word_class': u'حرف نصب', 'action': u'ناصب', 'WORD': u'أن', 'ID': 168, 'tanwin': 0},
             {'definition': 0, 'qasam': 0, 'object_type': u'اسم', 'vocalized': u'أَنَّ', 'conjonction': 1, 'pronoun': 1, 'defined': 0, 'interrog': 0, 'is_inflected': 0, 'word_type': u'حرف', 'preposition': 1, 'need': u'', 'conjugation': 0, 'word_class': u'إن و أخواتها', 'action': u'ناصب', 'WORD': u'أن', 'ID': 481, 'tanwin': 0}]
            
    unvocalizedc                 0   i | _         || _        || _        t          t          d          rt          j        }nAt          j                            t          j        	                    t                              }t          j                            |t                    }t          j                            |          rd|z   dz   }	 t          j        |dd          | _        t          j        | j        _        | j                                        | _        nh# t          j        $ r t+          d|           Y nGw xY wt+          d                    d	|d
t          j        g                              d                     d}|                     |           dS )zw
        initialisation of dictionary from a data dictionary, 
        create indexes to speed up the access.

        frozenzfile:z?mode=roFT)check_same_threaduriz(Fatal Error Can't find the database file zInexistant Filez current dir utf8r	   N)
dictionarykey_attribute
table_namehasattrsysprefixospathdirnamerealpath__file__joinFILE_DB_FREQexistssqliteconnect
db_connectRowrow_factorycursorOperationalErrorprintcurdirencodecreate_table_index)selfr   r   base	file_pathfile_uriindex_fields          _/var/www/html/speakWrite/venv/lib/python3.11/site-packages/arramooz/stopwordsdictionaryclass.py__init__zStopWordsDictionary.__init__6   st    +
 %3!! 	AZDDW__RW%5%5h%?%?@@DGLL|44	7>>)$$ 	)y(3H9$*N8eY]$^$^$^
 17
+ $ 6 6 8 8	 * M M M@)LLLLLM 499/OY  ) ) ) $,,,,,s   D D98D9c                 J    | j         r| j                                          dS dS )zH
        Delete instance and close database connection
        
        N)r    close)r)   s    r.   __del__zStopWordsDictionary.__del__`   s2    
 ? 	$O!!#####	$ 	$    c                     d| j         d|d}	 | j                            |           | j        rdS dS # t          j        $ r Y dS w xY w)z create the database index if not exists
        @param index_field: the given to be indexed field
        @type index_field: text
        @return: void
        @rtype: void
        z&create index if not exists myindex on z ()TFNr   r#   executer   r$   )r)   r-   sqls      r.   r(   z&StopWordsDictionary.create_table_indexj   sw      	&	K$$${ t  & 	 	 	55	s   !5 AAc                    d| j         d|d}	 | j                            |           | j        r?| j        D ]7}i }| j        D ](}| j        |         }||         r||         ||<   #d||<   )|c S n# t          j        $ r Y dS w xY wdS )z Get dictionary entry by id from the dictionary
        @param idf: word identifier
        @type idf: integer
        @return: all attributes
        @rtype: dict
        select * FROM  WHERE id = '' F)r   r#   r7   attrib_num_indexr   r$   )r)   idfr8   row
entry_dictnum_keytext_keys          r.   get_entry_by_idz#StopWordsDictionary.get_entry_by_id|   s      8<L	K$$${ &; & &C!#J#'#8 ; ;%)%:7%Cw< ;58\Jx007:j22%%%%& 	 	 	55	us   AA1 1BBc                     d| j         d|d}	 | j                            |           | j        r| j        D ]}||         c S n# t          j        $ r Y dS w xY wdS )a   Get attribute value by id from the dictionary
        @param idf : word identifier
        @type idf: integer
        @param attribute:the attribute name
        @type attribute: unicode
        @return: The attribute
        value
        @rtype: mix.
        r:   r;   r<   Fr6   )r)   r?   	attributer8   r@   s        r.   get_attrib_by_idz$StopWordsDictionary.get_attrib_by_id   s     P 8<L	K$$${ +; + +C	N***& 	 	 	55	us   3A AAc                    g }d| j         d|d}	 | j                            |           n5# t          j        $ r#}t          d|j        d|z             Y d}~nd}~ww xY w| j        r| j        D ]}|                    |           |S )uU  
        look up for all word forms in the dictionary

        Example:
            >>> mydict = StopWordsDictionary('classedstopwords')
            >>> wordlist = [u"بعضهما", u'في', u"منً", u"أن", u'عندما']
            >>> tmp_list =[]
            >>> for word in wordlist:
            >>>    print("-------")
            >>>    print("word looked up ", mydict.is_stopword(word))
            >>>    idlist = mydict.lookup(word)
            >>>    for word_tuple in idlist:
            >>>        tmp_list.append(dict(word_tuple))
            >>> print(tmp_list)
            [{'definition': 0, 'qasam': 1, 'object_type': u'اسم', 'vocalized': u'فِي', 'conjonction': 1, 'pronoun': 1, 'defined': 0, 'interrog': 1, 'is_inflected': 0, 'word_type': u'حرف', 'preposition': 0, 'need': u'', 'conjugation': 0, 'word_class': u'حرف جر', 'action': u'جار', 'WORD': u'في', 'ID': 203, 'tanwin': 0},
             {'definition': 0, 'qasam': 0, 'object_type': u'اسم', 'vocalized': u'فِي', 'conjonction': 1, 'pronoun': 1, 'defined': 0, 'interrog': 0, 'is_inflected': 0, 'word_type': u'اسم', 'preposition': 1, 'need': u'', 'conjugation': 0, 'word_class': u'الأسماء الخمسة', 'action': u'جار', 'WORD': u'في', 'ID': 304, 'tanwin': 0},
             {'definition': 0, 'qasam': 1, 'object_type': u'فعل', 'vocalized': u'أَنْ', 'conjonction': 1, 'pronoun': 0, 'defined': 0, 'interrog': 0, 'is_inflected': 0, 'word_type': u'حرف', 'preposition': 1, 'need': u'', 'conjugation': 0, 'word_class': u'حرف نصب', 'action': u'ناصب', 'WORD': u'أن', 'ID': 168, 'tanwin': 0},
             {'definition': 0, 'qasam': 0, 'object_type': u'اسم', 'vocalized': u'أَنَّ', 'conjonction': 1, 'pronoun': 1, 'defined': 0, 'interrog': 0, 'is_inflected': 0, 'word_type': u'حرف', 'preposition': 1, 'need': u'', 'conjugation': 0, 'word_class': u'إن و أخواتها', 'action': u'ناصب', 'WORD': u'أن', 'ID': 481, 'tanwin': 0}]
            
        @param text:vocalized word.
        @type text: unicode.
        @return: list of dictionary entries IDs.
        @rtype: list.
        r:   z WHERE word = 'r<   zer:z%Fatal error in query: stopwords on %sN)r   r#   r7   r   Errorr%   messageappend)r)   textidlistr8   err@   s         r.   lookupzStopWordsDictionary.lookup   s    2 
///444!	RK$$$$ | 	R 	R 	R%%LS%PQQQQQQQQ	R ; 	#{ # #c""""s   - AAAc                 <    g }|                      |          }|rdS dS )z
        return the word frequency from the in the dictionary
        @param text:vocalized word.
        @type text: unicode.
        @return: word freq.
        @rtype: integer.
        TF)rO   )r)   rL   rM   s      r.   is_stopwordzStopWordsDictionary.is_stopword   s,     T"" 	45r3   N)r	   )__name__
__module____qualname____doc__r/   r2   r(   rD   rG   rO   rQ    r3   r.   r   r      s         0(- (- (- (-T$ $ $  $  >1 1 1f( ( (T    r3   r   c                     t          d          } g d}g }|D ]p}t          d           t          d|                     |                     |                     |          }|D ]$}|                    t          |                     %qt          t          |                              dd                              d                     dS )	z
    Test main classedstopwords)u   بعضهماu   فيu   منًu   أنu
   عندماz-------zword looked up z},z},
zunicode-escapeN)	r   r%   rQ   rO   rK   dictreprreplacedecode)mydictwordlisttmp_listwordrM   
word_tuples         r.   mainlyrb   
  s     !!344FLLLHH . .i!3!3D!9!9:::t$$  	. 	.JOOD,,----	. 
$x..
 
 f
-
-
4
45E
F
FGGGGGr3   __main__)rU   
__future__r   r   r   r   r   r   sqlite3r   r   pyarabic.arabyarabyr   rb   rR   rV   r3   r.   <module>rh      s                    '      j j j j j j j jZH H H 
FHHHHH r3   