
    '[f                     x    d dl Z d dlmZ d dlmZ d dlmZ  G d de          Zd Ze	dk    r e             dS dS )	    N)defaultdict)reduce)CorpusReaderc                   x     e Zd ZdZ ej        d          Zed             Zd fd	Z	ddZ
ddZdd	Zd
 Z xZS )LinThesaurusCorpusReaderzEWrapper for the LISP-formatted thesauruses distributed by Dekang Lin.z \("?([^"]+)"? \(desc [0-9.]+\).+c                  *    t          t                    S )z6Factory for creating defaultdict of defaultdict(dict)s)r   dict     J/var/www/piapp/venv/lib/python3.11/site-packages/nltk/corpus/reader/lin.py__defaultdict_factoryz.LinThesaurusCorpusReader.__defaultdict_factory   s     4   r           c                 z   t                                          |d           t          t          j                  | _        || _        |                     dd          D ]\  }}}t          |          5 }d}|D ]}|	                                }|r#t          j
                            d|          }	d};|dk    rd}D|                    d          }
t          |
          dk    r;|
\  }}t          |          | j        |         |	         |	                    d	          <   	 d
d
d
           n# 1 swxY w Y   d
S )a  
        Initialize the thesaurus.

        :param root: root directory containing thesaurus LISP files
        :type root: C{string}
        :param badscore: the score to give to words which do not appear in each other's sets of synonyms
        :type badscore: C{float}
        zsim[A-Z]\.lspT)include_encodinginclude_fileidz\1Fz))	   "N)super__init__r   r   ._LinThesaurusCorpusReader__defaultdict_factory
_thesaurus	_badscoreabspathsopenstrip_key_resubsplitlenfloat)selfrootbadscorepathencodingfileidlin_filefirstlinekey
split_linengramscore	__class__s                r   r   z!LinThesaurusCorpusReader.__init__   s    	/000%&>&TUU!&*mm!$ '4 '
 '
 	 	"D(F d x$  D::<<D 6>BB5$OO % $ &*ZZ%5%5
z??a//+5LE5MR %N NDOF3C8S9I9IJ              	 	s   5B-D//D3	6D3	Nc                      k    r|rdS d  j         D             S |r5 j        |                  v r j        |                           n j        S  fd j         D             S )a  
        Returns the similarity score for two ngrams.

        :param ngram1: first ngram to compare
        :type ngram1: C{string}
        :param ngram2: second ngram to compare
        :type ngram2: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, just the score for the two ngrams; otherwise,
                 list of tuples of fileids and scores.
              ?c                     g | ]}|d fS )r1   r
   ).0fids     r   
<listcomp>z7LinThesaurusCorpusReader.similarity.<locals>.<listcomp>Q   s    <<<sc
<<<r   c                 ~    g | ]9}|j         |                  v rj         |                           nj        f:S r
   )r   r   )r3   r4   ngram1ngram2r"   s     r   r5   z7LinThesaurusCorpusReader.similarity.<locals>.<listcomp>Z   sg     
 
 
    &)=f)EEE !OC08@@!%
 
 
r   )_fileidsr   r   )r"   r7   r8   r'   s   ``` r   
similarityz#LinThesaurusCorpusReader.similarity?   s     V =s<<dm<<<<  !8!@@@ OF+F3F;;
 
 
 
 
 
  $}
 
 
 
r   c                 |     |r% j         |                                                  S  fd j        D             S )a   
        Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, list of tuples of scores and synonyms; otherwise,
                 list of tuples of fileids and lists, where inner lists consist of tuples of
                 scores and synonyms.
        c                 ^    g | ])}|j         |                                                  f*S r
   )r   itemsr3   r'   r-   r"   s     r   r5   z<LinThesaurusCorpusReader.scored_synonyms.<locals>.<listcomp>u   sF        07==??@  r   )r   r=   r9   r"   r-   r'   s   `` r   scored_synonymsz(LinThesaurusCorpusReader.scored_synonymsf   s^      	?6*5177999    "m   r   c                 |     |r% j         |                                                  S  fd j        D             S )a  
        Returns a list of synonyms for the current ngram.

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and
                 lists, where inner lists contain synonyms.
        c                 ^    g | ])}|j         |                                                  f*S r
   )r   keysr>   s     r   r5   z5LinThesaurusCorpusReader.synonyms.<locals>.<listcomp>   sF        07<<>>?  r   )r   rC   r9   r?   s   `` r   synonymsz!LinThesaurusCorpusReader.synonymsz   s^      	?6*5166888    "m   r   c                 :     t           fd j        d          S )z
        Determines whether or not the given ngram is in the thesaurus.

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :return: whether the given ngram is in the thesaurus.
        c                 &    | pj         |         v S N)r   )accumr'   r-   r"   s     r   <lambda>z7LinThesaurusCorpusReader.__contains__.<locals>.<lambda>   s    %"MET_V5L,L r   F)r   r9   )r"   r-   s   ``r   __contains__z%LinThesaurusCorpusReader.__contains__   s/     MMMMMM
 
 	
r   )r   rG   )__name__
__module____qualname____doc__recompiler   staticmethodr   r   r:   r@   rD   rJ   __classcell__)r/   s   @r   r   r      s        OO bj<==G! ! \!" " " " " "H% % % %N   (   &
 
 
 
 
 
 
r   r   c                  4   ddl m}  d}d}t          d|z              t          |                     |                     t          d|z              t          |                     |                     t          d|z              t          |                     |d	                     t          d|z              t          |                     |d	                     t          d
| d| d           t          |                     ||                     d S )Nr   )lin_thesaurusbusiness
enterprisezGetting synonyms for zGetting scored synonyms for z5Getting synonyms from simN.lsp (noun subsection) for zsimN.lsp)r'   zSimilarity score for z and :)nltk.corpusrT   printrD   r@   r:   )thesword1word2s      r   demor]      s$   111111EE	
!E
)***	$--

	
(5
0111	$

u
%
%&&&	
AE
IJJJ	$--j-
1
1222	
AE
IJJJ	$--j-
1
1222	
6%
6
6e
6
6
6777	$//%
'
'(((((r   __main__)
rO   collectionsr   	functoolsr   nltk.corpus.readerr   r   r]   rK   r
   r   r   <module>rb      s    
			 # # # # # #       + + + + + +J
 J
 J
 J
 J
| J
 J
 J
d) ) )* zDFFFFF r   