
    '[f[                         d dl Z d dlmZ d dlT d dlT  G d de          Z G d de          Zd Z	d	 Z
ed
k    r e	              e
             dS dS )    N)util)*c                   F    e Zd Zd
dZddZddZddZddZddZdd	Z	dS )ChasenCorpusReaderutf8Nc                 N    || _         t                              | |||           d S N)_sent_splitterCorpusReader__init__)selfrootfileidsencodingsent_splitters        M/var/www/piapp/venv/lib/python3.11/site-packages/nltk/corpus/reader/chasen.pyr   zChasenCorpusReader.__init__   s*    +dD'8<<<<<    c                 b     t           fd                     |d          D                       S )Nc           
      H    g | ]\  }}t          ||d d d j                  S )FChasenCorpusViewr
   .0fileidencr   s      r   
<listcomp>z,ChasenCorpusReader.words.<locals>.<listcomp>   sB       !VS !eUE4CVWW  r   Tconcatabspathsr   r   s   ` r   wordszChasenCorpusReader.words   I       %)]]7D%A%A  
 
 	
r   c                 b     t           fd                     |d          D                       S )Nc           
      H    g | ]\  }}t          ||d ddj                  S TFr   r   s      r   r   z3ChasenCorpusReader.tagged_words.<locals>.<listcomp>   sB       !VS !dE5$BUVV  r   Tr   r    s   ` r   tagged_wordszChasenCorpusReader.tagged_words   r"   r   c                 b     t           fd                     |d          D                       S )Nc           
      H    g | ]\  }}t          ||d dd j                  S FTr   r   s      r   r   z,ChasenCorpusReader.sents.<locals>.<listcomp>%   sB       !VS !eT5$BUVV  r   Tr   r    s   ` r   sentszChasenCorpusReader.sents#   r"   r   c                 b     t           fd                     |d          D                       S )Nc           
      H    g | ]\  }}t          ||d d dj                  S r%   r   r   s      r   r   z3ChasenCorpusReader.tagged_sents.<locals>.<listcomp>-   sB       !VS !dD%ATUU  r   Tr   r    s   ` r   tagged_sentszChasenCorpusReader.tagged_sents+   r"   r   c                 b     t           fd                     |d          D                       S )Nc           
      H    g | ]\  }}t          ||d ddj                  S r)   r   r   s      r   r   z,ChasenCorpusReader.paras.<locals>.<listcomp>5   sB       !VS !eT4ATUU  r   Tr   r    s   ` r   paraszChasenCorpusReader.paras3   r"   r   c                 b     t           fd                     |d          D                       S )Nc           
      H    g | ]\  }}t          ||d d d j                  S )Tr   r   s      r   r   z3ChasenCorpusReader.tagged_paras.<locals>.<listcomp>=   sB       !VS !dD$@STT  r   Tr   r    s   ` r   tagged_paraszChasenCorpusReader.tagged_paras;   r"   r   )r   Nr	   )
__name__
__module____qualname__r   r!   r&   r*   r-   r0   r3    r   r   r   r      s        = = = =
 
 
 

 
 
 

 
 
 

 
 
 

 
 
 

 
 
 
 
 
r   r   c                   "    e Zd ZdZ	 ddZd ZdS )r   z
    A specialized corpus view for ChasenReader. Similar to ``TaggedCorpusView``,
    but this'll use fixed sets of word and sentence tokenizer.
    Nc                 x    || _         || _        || _        || _        t                              | ||           d S )Nr   )_tagged_group_by_sent_group_by_parar
   StreamBackedCorpusViewr   )r   corpus_filer   taggedgroup_by_sentgroup_by_parar   s          r   r   zChasenCorpusView.__init__J   sD     +++''kH'MMMMMr   c                    g }t          |dd          D ]w}g }g }|                                D ]}|                                dk    }|                    d          }|d         d                    |dd                   f}	|s|                    |	           |s| j        r\|                     |	          rG| j        sd |D             }| j        r|                    |           n|	                    |           g }t          |          dk    rE| j        sd	 |D             }| j        r|                    |           n|	                    |           | j        r|                    |           b|	                    |           y|S )
zReads one paragraph at a time..z^EOS\nEOS	r      Nc                     g | ]\  }}|S r7   r7   r   wts      r   r   z/ChasenCorpusView.read_block.<locals>.<listcomp>k   s    555fq!555r   c                     g | ]\  }}|S r7   r7   rI   s      r   r   z/ChasenCorpusView.read_block.<locals>.<listcomp>t   s    111&1aA111r   )read_regexp_block
splitlinesstripsplitjoinappendr
   r;   r<   extendlenr=   )
r   streamblockpara_strparasentline_eos_cellsrJ   s
             r   
read_blockzChasenCorpusView.read_blockY   s   )&$	BB "	# "	#HDD ++--  zz||u,D))AY		&* 5 56 #KKNNN D/ D4G4G4J4J < 655555* *D))))D)))D4yy1}}| 211D111D& &KK%%%%KK%%%" #T""""T""""r   r	   )r4   r5   r6   __doc__r   r]   r7   r   r   r   r   D   sJ          N N N N' ' ' ' 'r   r   c                  F   dd l } ddlm}  |dt          dd          }t	          d                    |                                dd	                              t	          d
                    d |                                dd         D                                  d S )Nr   LazyCorpusLoaderjeita.*chasenutf-8r:   /iTV  i|V  z
EOS
c              3   T   K   | ]#}d                      d |D                       V  $dS )
c              3      K   | ]?}d                      |d         |d                             d          d                   V  @dS )z{}/{}r   rG   rF      N)formatrP   )r   rJ   s     r   	<genexpr>z!demo.<locals>.<genexpr>.<genexpr>   sI      NNAgnnQqT1Q4::d+;+;A+>??NNNNNNr   N)rQ   )r   rY   s     r   rk   zdemo.<locals>.<genexpr>   sQ       
 
 IINNNNNNN
 
 
 
 
 
r   iz  i}  )nltknltk.corpus.utilra   r   printrQ   r!   r-   )rl   ra   rb   s      r   demoro      s    KKK111111W&8+PWXXXE	#((5;;==u-
.
.///	 
 
**,,T$Y7
 
 
 	
 	
    r   c                      ddl m}   | dt          dd          }t          |                                d         d         t
                    sJ d S )Nr   r`   rb   rc   rd   r:   rG   )rm   ra   r   
isinstancer&   str)ra   rb   s     r   testrs      sa    111111W&8+PWXXXEe((**1-a0#6666666r   __main__)sysnltk.corpus.readerr   nltk.corpus.reader.apinltk.corpus.reader.utilr   r   r>   r   ro   rs   r4   r7   r   r   <module>ry      s    


 # # # # # # $ $ $ $ % % % %3
 3
 3
 3
 3
 3
 3
 3
l< < < < <- < < <~   7 7 7 zDFFFDFFFFF r   