
    '[f                     x    d Z ddlZddlT dddddd	d
Z ee          Z G d d          Z G d de          ZdS )a)  
Corpus reader for the Information Extraction and Entity Recognition Corpus.

NIST 1999 Information Extraction: Entity Recognition Evaluation
https://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm

This corpus contains the NEWSWIRE development test data for the
NIST 1999 IE-ER Evaluation.  The files were taken from the
subdirectory: ``/ie_er_99/english/devtest/newswire/*.ref.nwt``
and filenames were shortened.

The corpus contains the following files: APW_19980314, APW_19980424,
APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407.
    N)*z&Associated Press Weekly, 14 March 1998z&Associated Press Weekly, 24 April 1998z&Associated Press Weekly, 29 April 1998zNew York Times, 15 March 1998zNew York Times, 3 April 1998zNew York Times, 7 April 1998)APW_19980314APW_19980424APW_19980429NYT_19980315NYT_19980403NYT_19980407c                       e Zd ZddZd ZdS )IEERDocumentN c                 L    || _         || _        || _        || _        || _        d S N)textdocnodoctype	date_timeheadline)selfr   r   r   r   r   s         K/var/www/piapp/venv/lib/python3.11/site-packages/nltk/corpus/reader/ieer.py__init__zIEERDocument.__init__+   s)    	
"     c                     | j         r-d                    | j                                                   }nAd                    d | j                                        D             d d                   dz   }| j        d| j         d|dS d|z  S )	N c                 .    g | ]}|d d         dk    |S )N   < ).0ws     r   
<listcomp>z)IEERDocument.__repr__.<locals>.<listcomp>7   s%    HHH1RaR5C<<!<<<r      z...z<IEERDocument z: >z<IEERDocument: %r>)r   joinleavesr   r   )r   r   s     r   __repr__zIEERDocument.__repr__2   s    = 	xx 4 4 6 677HH HHTY%5%5%7%7HHH"MNNQVV  :!?DJ??(????'(22r   )NNNr   )__name__
__module____qualname__r   r%   r   r   r   r   r   *   s7        ! ! ! !
3 
3 
3 
3 
3r   r   c                   4    e Zd ZdZddZddZd Zd Zd ZdS )	IEERCorpusReaderr   Nc                 b     t           fd                     |d          D                       S )Nc                 D    g | ]\  }}t          |j        |           S )encoding)StreamBackedCorpusView_read_blockr   fileidencr   s      r   r    z)IEERCorpusReader.docs.<locals>.<listcomp>D   s?       !VS 'vt/?#NNN  r   Tconcatabspathsr   fileidss   ` r   docszIEERCorpusReader.docsB   I       %)]]7D%A%A  
 
 	
r   c                 b     t           fd                     |d          D                       S )Nc                 D    g | ]\  }}t          |j        |           S r-   )r/   _read_parsed_blockr1   s      r   r    z0IEERCorpusReader.parsed_docs.<locals>.<listcomp>L   s@       !VS 'vt/FQTUUU  r   Tr4   r7   s   ` r   parsed_docszIEERCorpusReader.parsed_docsJ   r:   r   c                 F      fd                      |          D             S )Nc                 n    g | ]1}                     |          j                             |          2S r   )_parser   )r   docr   s     r   r    z7IEERCorpusReader._read_parsed_block.<locals>.<listcomp>T   sD     
 
 
{{3%1 KK111r   )r0   )r   streams   ` r   r=   z#IEERCorpusReader._read_parsed_blockR   s<    
 
 
 
''//
 
 
 	
r   c                     t           j                            |d          }t          |t                    rt          di |S t          |          S )NDOCUMENT)
root_labelr   )nltkchunkieerstr2tree
isinstancedictr   )r   rB   vals      r   rA   zIEERCorpusReader._parseZ   sP    j%%cj%AAc4   	%&&#&&&$$$r   c                 N   g }	 |                                 }|sn|                                dk    rn1|                    |           	 |                                 }|sn/|                    |           |                                dk    rnFd                    |          gS )NTz<DOC>z</DOC>
)readlinestripappendr#   )r   rC   outlines       r   r0   zIEERCorpusReader._read_blocka   s    	??$$D zz||w&&	 	

4	??$$D JJtzz||x''	 		#r   r   )	r&   r'   r(   __doc__r9   r>   r=   rA   r0   r   r   r   r*   r*   ?   sn        G
 
 
 

 
 
 

 
 
% % %         r   r*   )	rT   rG   nltk.corpus.reader.apititlessorted	documentsr   CorpusReaderr*   r   r   r   <module>rZ      s      $ $ $ $
 =<<322
 
 F6NN	3 3 3 3 3 3 3 3*5  5  5  5  5 | 5  5  5  5  5 r   