
    -i5              	          d Z dZddlmZ 	 ddlmZmZ ddlmZ n#  ddlmZmZ ddlmZ Y nxY w	 ddl	m
Z
 n#  ddl	m
Z
 Y nxY w G d d	          Zd
 Zedk    rddlZddlmZ g dZeD ]=\  ZZ ee          Zeek    r' ededede                    d                     > e e e                                  e e e                                  e e ed                                e e ed                                e e ed                                e e ed                               dS dS )z0.4.3zTaha Zerrouki    N)	STOPWORDSSTOPWORDS_INDEX)r      )stopwordTuplec                       e Zd ZdZd Zd Zd Zd Zd Zd Z	dd	Z
dd
Zd ZddZddZd ZddZddZddZddZd Zd Zd Zd Zd Zd ZdS )stopwords_lexiconz<
    A lexicon class for stopwords extracttion features
    c                     t           | _        t          | _        |                     | j                  | _        |                     | j                  | _        |                                 | _        d S )N)	r   
forms_dictclassed_STOPWORDSlemmas_dictcreate_vocalized_indexvocalized_lemmas_dictvocalized_forms_dictcreate_categories_index
categoriesselfs    _/var/www/html/speakWrite/venv/lib/python3.11/site-packages/arabicstopwords/stopwords_lexicon.py__init__zstopwords_lexicon.__init__,   sY    $,%)%@%@AQ%R%R"%)%@%@%Q%Q!6688    c                     i }|D ]J}||         D ]?}|                     dd          }|r%||v r||                             |           |g||<   @K|S )z2
        Create index of vocalized lemmas
        	vocalized )getappend)r   table	voca_dict	unvoc_keyitemr   s         r   r   z(stopwords_lexicon.create_vocalized_index3   s     	 	3 	3Ii( 3 3 HH[44	 3 I--!),33D999,07Ii(3 r   c           
         i }| j         df| j        dffD ]\  }}|D ]}||         D ]}|                    dd          }|                    d|                    dd                    }|rq||vri ||<   |||         vrg ||         |<   g ||         |dz   <   ||         |                             |           ||         |dz                                |           |S )z,
        Create index of categories
        formslemmasr   r   	type_wordtype
_vocalized)r
   r   r   r   )r   
index_dictr   
table_typer   r   r   categorys           r   r   z)stopwords_lexicon.create_categories_indexA   s6    
#'?G"<t?OQY>Z!\ 	X 	XE:" X X	!), X XD $R 8 8I $TXXfR5H5H I IH X#:5535Jx0%Z-AAA?AJx0<LNJx0L1HI"8,Z8??	JJJ"8,Z-DELLYWWWXX r   c                     || j         v S )z test if word is a stop)r
   r   words     r   is_stopzstopwords_lexicon.is_stopX   s    t&&r   c                 ,    |                      |          S )z return all stems for a word	get_stemsr*   s     r   stop_stemlistzstopwords_lexicon.stop_stemlist\   s    ~~d###r   c                 D    |                      |          }|r|d         S dS )z retrun a stem of a stop word r   r   r.   r   r+   stemlists      r   	stop_stemzstopwords_lexicon.stop_stema   s*    >>$'' 	A;2r   Fc                     |s&t          | j                                                  S g }| j                                        D ]*}|                    |                     |                     +|S )z return all arabic stopwords)listr
   keysextendget_vocalizedsr   r   vocalized_listxs       r   stopwords_listz stopwords_lexicon.stopwords_listi   ss     	",,..///N_))++ > >%%d&9&9!&<&<====!!r   c                     |s&t          | j                                                  S g }| j                                        D ]*}|                    |                     |                     +|S )z( return all arabic classified  stopwords)r6   r   r7   r8   r9   r:   s       r   classed_stopwords_listz(stopwords_lexicon.classed_stopwords_listt   su     	"(--//000N%**,, > >%%d&9&9!&<&<====!!r   c                 F    |t           v rd t           |         D             S g S )z! return all forms for a stop wordc                     g | ]}|S  rB   .0ds     r   
<listcomp>z4stopwords_lexicon.stopword_forms.<locals>.<listcomp>   s    666!A666r   )r   r*   s     r   stopword_formsz stopwords_lexicon.stopword_forms   s+    ?""66t46666Ir   c                 x    |s| j                             |i           }n| j                            |i           }|S z:
         return the all features for  a stopword
        )r
   r   r   r   r+   lemmar3   s       r   get_features_dictz#stopwords_lexicon.get_features_dict   sB      	5**433HH'++D44Hr   c                 B   |s:|r| j                             |g           }nU| j                            |g           }n9|r| j                            |g           }n| j                            |g           }g }|D ]$}|                    t          |                     %|S rI   )r   r   r
   r   r   r   r   )r   r+   rK   r   r3   stoptuple_listr   s          r   get_stopwordtuplesz$stopwords_lexicon.get_stopwordtuples   s      
	: 9488rBB?..tR88  :599$CC+//b99 	6 	6D  t!4!45555r   c                 N    t          | j                                                  S )zI
        Get all categories (wordtypes available in the lexicon)
        )r6   r   r7   r   s    r   get_categoriesz stopwords_lexicon.get_categories   s      DO((**+++r   r   c                     d}|rd}nd}|r|dz  }| j                             |i                               |g           S )zI
        Get all stopwords  (wordtypes available in the lexicon)
        r   r"   r!   r%   )r   r   )r   r(   rK   r   	secondkeys        r   get_by_categoryz!stopwords_lexicon.get_by_category   sW     	 	  III 	&%I""8R0044YCCCr   c                     |s(fd| j                             |i           D             }n'fd| j                            |i           D             }t          t	          |                    }|S )z<
         return the asked feature form  a stopword
        c                 h    g | ].}|                     d           |                     d           /S r   r   rD   rE   features     r   rF   z1stopwords_lexicon.get_feature.<locals>.<listcomp>   s?    eeeaSTSXSXY`acSdSdegb))eeer   c                 h    g | ].}|                     d           |                     d           /S rW   rX   rY   s     r   rF   z1stopwords_lexicon.get_feature.<locals>.<listcomp>   s?    fffaTUTYTYZabdTeTefgb))fffr   r
   r   r   r6   set)r   r+   rZ   rK   r3   s     `  r   get_featurezstopwords_lexicon.get_feature   s      	geeee4?3F3FtB3O3OeeeHHffff43C3G3GR3P3PfffHH&&r   c                     |s&d | j                             |i           D             }n%d | j                            |i           D             }t          t	          |                    }|S )z:
         return the vocalized form fo a stopword
        c                 f    g | ].}|                     d d          |                     d d          /S r   r   rX   rC   s     r   rF   z4stopwords_lexicon.get_vocalizeds.<locals>.<listcomp>   s>    mmm!WXW\W\]hikWlWlmk"--mmmr   c                 f    g | ].}|                     d d          |                     d d          /S ra   rX   rC   s     r   rF   z4stopwords_lexicon.get_vocalizeds.<locals>.<listcomp>   s>    ooo!XYX]X]^ijlXmXmok"--ooor   r\   rJ   s       r   r9   z stopwords_lexicon.get_vocalizeds   st      	pmmt7J7J4PR7S7SmmmHHoot7G7K7KDQS7T7ToooHH&&r   c                     |s&d | j                             |i           D             }n%d | j                            |i           D             }t          t	          |                    }|S )z9
         return the wordtype form fo a stopword
        c                 f    g | ].}|                     d d          |                     d d          /S )r$   r   rX   rC   s     r   rF   z3stopwords_lexicon.get_wordtypes.<locals>.<listcomp>   s>    cccQRSRWRWX^_aRbRbcfR((cccr   c                 f    g | ].}|                     d d          |                     d d          /S )r#   r   rX   rC   s     r   rF   z3stopwords_lexicon.get_wordtypes.<locals>.<listcomp>   s>    nnn!XYX]X]^ijlXmXmnk"--nnnr   r\   rJ   s       r   get_wordtypeszstopwords_lexicon.get_wordtypes   ss      	occ$/2E2Ed22N2NcccHHnnt7G7K7KDQS7T7TnnnHH&&r   c                     d | j                             |i           D             }t          t          |                    }|S )z>
        return the word sub class form fo a stopword
        c                 f    g | ].}|                     d d          |                     d d          /S )
class_wordr   rX   rC   s     r   rF   z3stopwords_lexicon.get_wordclass.<locals>.<listcomp>   s>    lllqUVUZUZ[ghjUkUklAEE,r**lllr   )r   r   r6   r]   r2   s      r   get_wordclasszstopwords_lexicon.get_wordclass   sC     mlD4D4H4Hb4Q4QlllH&&r   c                     d | j                             |i           D             }t          t          |                    }|S )z6
         return True if the word  get tags, 
        c                 f    g | ].}|                     d d          |                     d d          /S )tagsr   rX   rC   s     r   rF   z.stopwords_lexicon.get_tags.<locals>.<listcomp>Z  ;    ___aeeTZ[]N^N^_AEE&$$___r   r
   r   r6   r]   r2   s      r   get_tagszstopwords_lexicon.get_tagsV  B     `_do.A.A$r.J.J___H&&r   c                     d | j                             |i           D             }t          t          |                    }|S )z7
         return True if the word  get stems, 
        c                 f    g | ].}|                     d d          |                     d d          /S )stemr   rX   rC   s     r   rF   z/stopwords_lexicon.get_stems.<locals>.<listcomp>c  rn   r   ro   r2   s      r   r/   zstopwords_lexicon.get_stems_  rq   r   c                     d | j                             |i           D             }t          t          |                    }|S )z(
         return get encletic, 
        c                 f    g | ].}|                     d d          |                     d d          /S )encleticr   rX   rC   s     r   rF   z3stopwords_lexicon.get_enclitics.<locals>.<listcomp>l  >    gggQRSRWRWXbceRfRfgAEE*R((gggr   ro   r2   s      r   get_encliticszstopwords_lexicon.get_encliticsh  B     hg$/2E2Ed22N2NgggH&&r   c                     d | j                             |i           D             }t          t          |                    }|S )z*
         return  get procletic, 
        c                 f    g | ].}|                     d d          |                     d d          /S )	procleticr   rX   rC   s     r   rF   z4stopwords_lexicon.get_procletics.<locals>.<listcomp>u  s>    iiiaSTSXSXYdegShShiAEE+b))iiir   ro   r2   s      r   get_procleticsz stopwords_lexicon.get_procleticsq  sB     ji4?3F3FtB3O3OiiiH&&r   c                     d | j                             |i           D             }t          t          |                    }|S )z'
         return   get lemma, 
        c                 f    g | ].}|                     d d          |                     d d          /S )originalr   rX   rC   s     r   rF   z0stopwords_lexicon.get_lemmas.<locals>.<listcomp>~  rx   r   ro   r2   s      r   
get_lemmaszstopwords_lexicon.get_lemmasz  rz   r   N)F)FF)r   FF)__name__
__module____qualname____doc__r   r   r   r,   r0   r4   r=   r?   rG   rL   rO   rQ   rT   r^   r9   rf   rj   rp   r/   ry   r~   r   rB   r   r   r   r   (   s        9 9 9    .' ' '$ $ $
  " " " "" " " "        *, , ,D D D D(	 	 	 		 	 	 		 	 	 	  d            r   r   c                 B    d}t          t          |                     dS )Nu
   لعلهمr   )printr4   )argsr+   s     r   mainr     s     D	)D//1r   __main__)arepr))u   منكمT)u   ممكنF)u
   عندماT)u
   حينئذTzError z is zwhere must be utf8u   حتىu   جميعu   لجميعهمu   لجم)__version__
__author__pyarabic.arabyarabystopwordsallformsr   r   stopwords_classifiedr   stopwordtupler   r   r   r   syspyarabic.arabreprr   wordswrepr,   resultr   encodelenr=   r?   rG   r4   rB   r   r   <module>r      s  2 
      E<<<<<<<<CCCCCCCE========DDDDDDDD-+++++++-,,,,,,,,X X X X X X X Xv
  
 zJJJ''''''  E
  W W3S==EEaaaEMMfUUVVV	E##nn

   	E##$$&&
'
'(((	E%%y))
*
*+++	E%%{++
,
,---	E%%		+,,
-
-...	E%%		)$$
%
%&&&&&% s    -8 A