
    '[fw?                         d dl mZ d dlmZ d dlT d dlT d dlmZ d dlm	Z	  G d de
          Z G d d	          Z G d
 d          Z G d de          Z G d de          Ze G d de                      ZdS )    )total_ordering)ElementTree)*)raise_unorderable_types)Treec                   R    e Zd ZdZ	 	 	 	 	 ddZddZd Zd Zdd	Zd
 Z	d fdZ
dS )NombankCorpusReadera  
    Corpus reader for the nombank corpus, which augments the Penn
    Treebank with information about the predicate argument structure
    of every noun instance.  The corpus consists of two parts: the
    predicate-argument annotations themselves, and a set of "frameset
    files" which define the argument labels used by the annotations,
    on a per-noun basis.  Each "frameset file" contains one or more
    predicates, such as ``'turn'`` or ``'turn_on'``, each of which is
    divided into coarse-grained word senses called "rolesets".  For
    each "roleset", the frameset file provides descriptions of the
    argument roles, along with examples.
     Nutf8c                     t          |t                    rt          ||          | _        t	          |          | _        t
                              | |||           || _        || _        || _	        || _
        dS )a  
        :param root: The root directory for this corpus.
        :param nomfile: The name of the file containing the predicate-
            argument annotations (relative to ``root``).
        :param framefiles: A list or regexp specifying the frameset
            fileids for this corpus.
        :param parse_fileid_xform: A transform that should be applied
            to the fileids in this corpus.  This should be a function
            of one argument (a fileid) that returns a string (the new
            fileid).
        :param parse_corpus: The corpus containing the parse trees
            corresponding to this corpus.  These parse trees are
            necessary to resolve the tree pointers used by nombank.
        N)
isinstancestrfind_corpus_fileids_fileidslistCorpusReader__init___nomfile
_nounsfile_parse_fileid_xform_parse_corpus)selfrootnomfile
framefiles	nounsfileparse_fileid_xformparse_corpusencodings           N/var/www/piapp/venv/lib/python3.11/site-packages/nltk/corpus/reader/nombank.pyr   zNombankCorpusReader.__init__    sw    4 j#&& 	B/jAADMZ((dD*h???  ##5 )    c                      i fdd<   t                                j                   fd                      j                            S )z
        :return: a corpus view that acts as a list of
            ``NombankInstance`` objects, one for each noun in the corpus.
        Nc                     | j         k    S N)baseform)instr%   s    r    <lambda>z/NombankCorpusReader.instances.<locals>.<lambda>M   s    T]h5N r!   instance_filterc                       j         | fi S r$   )_read_instance_block)streamkwargsr   s    r    r'   z/NombankCorpusReader.instances.<locals>.<lambda>P   s    444VFFvFF r!   r   )StreamBackedCorpusViewabspathr   r   )r   r%   r,   s   ``@r    	instanceszNombankCorpusReader.instancesF   sn    
 (N(N(N(NF$%%LL''FFFFF]]4=11
 
 
 	
r!   c                     t          |                     | j                  t          |                     | j                            S )z
        :return: a corpus view that acts as a list of strings, one for
            each line in the predicate-argument annotation file.
        r-   )r.   r/   r   read_line_blockr   r   s    r    lineszNombankCorpusReader.linesT   s@    
 &LL'']]4=11
 
 
 	
r!   c                 `   |                     d          d         }|                    dd          }|                    dd                              dd          }d|z  }||                                 vrt          d	|z            |                     |                                          5 }t          j        |                                          }d
d
d
           n# 1 swxY w Y   |	                    d          D ]}|j
        d         |k    r|c S t          d| d|           )zE
        :return: the xml description for the given roleset.
        .r   	perc-sign%oneslashonezero1/10
1-slash-10frames/%s.xmlFrameset file for %s not foundNpredicate/rolesetidzRoleset z not found in )splitreplacefileids
ValueErrorr/   openr   parsegetrootfindallattrib)r   
roleset_idr%   	framefilefpetreerolesets          r    rM   zNombankCorpusReader.roleset_   sx    ##C((+##K55##$5v>>FFL
 
 $h.	DLLNN**=
JKKK \\)$$))++ 	4r%b))1133E	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4}}%899 	 	G~d#z11 2IJIIiIIJJJs   0'C##C'*C'c                    |1d|z  }||                                  vrt          d|z            |g}n|                                  }g }|D ]}|                     |                                          5 }t	          j        |                                          }ddd           n# 1 swxY w Y   |                    |                    d                     t          |          S )zA
        :return: list of xml descriptions for rolesets.
        Nr<   r=   r>   )
rB   rC   r/   rD   r   rE   rF   appendrG   LazyConcatenation)r   r%   rJ   r   rsetsrK   rL   s          r    rolesetszNombankCorpusReader.rolesetsu   s#    '(2I.. !AH!LMMM#JJJ# 	= 	=I i((--// 82#)"--55778 8 8 8 8 8 8 8 8 8 8 8 8 8 8LL':;;<<<< '''s   5'B((B,	/B,	c                     t          |                     | j                  t          |                     | j                            S )z
        :return: a corpus view that acts as a list of all noun lemmas
            in this corpus (from the nombank.1.0.words file).
        r-   )r.   r/   r   r2   r   r3   s    r    nounszNombankCorpusReader.nouns   s@    
 &LL))]]4?33
 
 
 	
r!   c                     dS )NT )r&   s    r    r'   zNombankCorpusReader.<lambda>   s     r!   c                 
   g }t          d          D ]p}|                                                                }|rFt                              || j        | j                  } ||          r|                    |           q|S )Nd   )rangereadlinestripNombankInstancerE   r   r   rO   )r   r+   r(   blockiliner&   s          r    r*   z(NombankCorpusReader._read_instance_block   s     s 	' 	'A??$$**,,D '&,,$2D4F  #?4(( 'LL&&&r!   )r
   NNNr   r$   )__name__
__module____qualname____doc__r   r0   r4   rM   rR   rT   r*   rV   r!   r    r	   r	      s         " $* $* $* $*L
 
 
 
	
 	
 	
K K K,( ( ( (*	
 	
 	
 <M;L      r!   r	   c                   r    e Zd Z	 d
dZed             Zd Zd Zd Z eed          Z	e
dd	            ZdS )r\   Nc
                     || _         	 || _        	 || _        	 || _        	 || _        	 || _        	 || _        	 t          |          | _        	 |	| _	        d S r$   )
fileidsentnumwordnumr%   sensenumber	predicatepredidtuple	argumentsr   )
r   rf   rg   rh   r%   ri   rj   rk   rm   r   s
             r    r   zNombankInstance.__init__   s     	  	& 	D !,&0"	@ *y))	 )	- 	-r!   c                     | j                             dd          }|                    dd                              dd          }| d| j         S )zThe name of the roleset used by this instance's predicate.
        Use ``nombank.roleset() <NombankCorpusReader.roleset>`` to
        look up information about the roleset.r8   r7   r:   r;   r9   r6   )r%   rA   ri   )r   rs     r    rM   zNombankInstance.roleset   sV    
 M!!#{33IIfl++33LBSTT((d&(((r!   c                 N    d                     | j        | j        | j                  S )Nz'<NombankInstance: {}, sent {}, word {}>)formatrf   rg   rh   r3   s    r    __repr__zNombankInstance.__repr__   s)    8??KLL
 
 	
r!   c                     d                     | j        | j        | j        | j        | j                  }| j        | j        dffz   }t          |          D ]\  }}|d| d| z  }|S )Nz{} {} {} {} {}rel -)	rq   rf   rg   rh   r%   ri   rm   rj   sorted)r   sitemsarglocargids        r    __str__zNombankInstance.__str__   s    ##KLLM
 
 4>5"9!;;%e}} 	& 	&OVU%V%%e%%%AAr!   c                     | j         d S | j        | j                                         vrd S | j                             | j                  | j                 S r$   )r   rf   rB   parsed_sentsrg   r3   s    r    	_get_treezNombankInstance._get_tree   sQ    $4;d/7799994 --dk::4<HHr!   zs
        The parse tree corresponding to this instance, or None if
        the corresponding tree is not available.)docc                    |                                  }t          |          dk     rt          d| z            |d d         \  }}}}}|dd          fdt                    D             }	t          |	          dk    rt          d| z            | ||          }t	          |          }t	          |          }|	d                              dd          \  }
}t
                              |
          }g }D ]J}|                     dd          \  }}|                    t
                              |          |f           Kt          |||||||||	  	        S )N   z Badly formatted nombank line: %r   c                 H    g | ]\  }}d |v 	                     |          S )z-rel)pop).0r^   pargss      r    
<listcomp>z)NombankInstance.parse.<locals>.<listcomp>  s,    FFFtq!&A++txx{{+++r!      r   rv   )	r@   lenrC   	enumerateintNombankTreePointerrE   rO   r\   )rx   r   r   piecesrf   rg   rh   r%   ri   rt   predlocrk   rj   rm   argrz   r{   r   s                    @r    rE   zNombankInstance.parse  s   v;;???!CDDD =C2A2J9'8[abbzFFFFyFFFs88q==?!CDDD )''//F g,,g,, a&,,sA..&,,W55	 	 	H 	HCIIc1--MFE066v>>FGGGG 

 

 
	
r!   r$   )NN)r`   ra   rb   r   propertyrM   rr   r|   r   treestaticmethodrE   rV   r!   r    r\   r\      s         0- 0- 0- 0-d ) ) X)
 
 
  I I I 84  D +
 +
 +
 \+
 +
 +
r!   r\   c                       e Zd ZdZd ZdS )NombankPointeran  
    A pointer used by nombank to identify one or more constituents in
    a parse tree.  ``NombankPointer`` is an abstract base class with
    three concrete subclasses:

    - ``NombankTreePointer`` is used to point to single constituents.
    - ``NombankSplitTreePointer`` is used to point to 'split'
      constituents, which consist of a sequence of two or more
      ``NombankTreePointer`` pointers.
    - ``NombankChainTreePointer`` is used to point to entire trace
      chains in a tree.  It consists of a sequence of pieces, which
      can be ``NombankTreePointer`` or ``NombankSplitTreePointer`` pointers.
    c                 B    | j         t          k    rt                      d S r$   )	__class__r   NotImplementedErrorr3   s    r    r   zNombankPointer.__init__F  s#    >^++%''' ,+r!   N)r`   ra   rb   rc   r   rV   r!   r    r   r   7  s-         ( ( ( ( (r!   r   c                   &    e Zd Zd Zd Zd Zd ZdS )NombankChainTreePointerc                     || _         d S r$   r   r   r   s     r    r   z NombankChainTreePointer.__init__L  s    	/ 	/r!   c                 J    d                     d | j        D                       S )Nr   c              3       K   | ]	}d |z  V  
dS z%sNrV   r   r   s     r    	<genexpr>z2NombankChainTreePointer.__str__.<locals>.<genexpr>S  &      66Qq666666r!   joinr   r3   s    r    r|   zNombankChainTreePointer.__str__R  %    xx66$+666666r!   c                     d| z  S )Nz<NombankChainTreePointer: %s>rV   r3   s    r    rr   z NombankChainTreePointer.__repr__U      .55r!   c                 h    t          d          t          dfd| j        D                       S )NParse tree not availablez*CHAIN*c                 :    g | ]}|                               S rV   selectr   r   r   s     r    r   z2NombankChainTreePointer.select.<locals>.<listcomp>[  #    DDD1DDDr!   rC   r   r   r   r   s    `r    r   zNombankChainTreePointer.selectX  >    <7888IDDDDDDDEEEr!   Nr`   ra   rb   r   r|   rr   r   rV   r!   r    r   r   K  sU        / / /7 7 76 6 6F F F F Fr!   r   c                   &    e Zd Zd Zd Zd Zd ZdS )NombankSplitTreePointerc                     || _         d S r$   r   r   s     r    r   z NombankSplitTreePointer.__init___  s    	3 	3r!   c                 J    d                     d | j        D                       S )N,c              3       K   | ]	}d |z  V  
dS r   rV   r   s     r    r   z2NombankSplitTreePointer.__str__.<locals>.<genexpr>e  r   r!   r   r3   s    r    r|   zNombankSplitTreePointer.__str__d  r   r!   c                     d| z  S )Nz<NombankSplitTreePointer: %s>rV   r3   s    r    rr   z NombankSplitTreePointer.__repr__g  r   r!   c                 h    t          d          t          dfd| j        D                       S )Nr   z*SPLIT*c                 :    g | ]}|                               S rV   r   r   s     r    r   z2NombankSplitTreePointer.select.<locals>.<listcomp>m  r   r!   r   r   s    `r    r   zNombankSplitTreePointer.selectj  r   r!   Nr   rV   r!   r    r   r   ^  sU        3 3 3
7 7 76 6 6F F F F Fr!   r   c                   X    e Zd ZdZd Zed             Zd Zd Zd Z	d Z
d Zd	 Zd
 ZdS )r   z@
    wordnum:height*wordnum:height*...
    wordnum:height,

    c                 "    || _         || _        d S r$   rh   height)r   rh   r   s      r    r   zNombankTreePointer.__init__x  s    r!   c                    |                      d          }t          |          dk    rt          d |D                       S |                      d          }t          |          dk    rt          d |D                       S |                      d          }t          |          dk    rt	          d| z            t          t          |d	                   t          |d                             S )
Nr   r   c                 B    g | ]}t                               |          S rV   r   rE   r   elts     r    r   z,NombankTreePointer.parse.<locals>.<listcomp>  '    AAA3#))#..AAAr!   r   c                 B    g | ]}t                               |          S rV   r   r   s     r    r   z,NombankTreePointer.parse.<locals>.<listcomp>  r   r!   :   zbad nombank pointer %rr   )r@   r   r   r   rC   r   r   )rx   r   s     r    rE   zNombankTreePointer.parse|  s     v;;??*AA&AAA  
 v;;??*AA&AAA  
 v;;!59:::!#fQi..#fQi..AAAr!   c                 $    | j          d| j         S )Nr   r   r3   s    r    r|   zNombankTreePointer.__str__  s    ,.....r!   c                 $    d| j         | j        fz  S )NzNombankTreePointer(%d, %d)r   r3   s    r    rr   zNombankTreePointer.__repr__  s    +t|T[.IIIr!   c                     t          |t          t          f          r)|j        d         }t          |t          t          f          )t          |t                    s| |u S | j        |j        k    o| j        |j        k    S Nr   )r   r   r   r   r   rh   r   r   others     r    __eq__zNombankTreePointer.__eq__  s}    !8:Q RSS 	$LOE !8:Q RSS 	$ %!344 	!5= |u},L1LLr!   c                     | |k     S r$   rV   r   s     r    __ne__zNombankTreePointer.__ne__  s    5=  r!   c                 6   t          |t          t          f          r)|j        d         }t          |t          t          f          )t          |t                    s t          |           t          |          k     S | j        | j         f|j        |j         fk     S r   )r   r   r   r   r   r?   rh   r   r   s     r    __lt__zNombankTreePointer.__lt__  s    !8:Q RSS 	$LOE !8:Q RSS 	$ %!344 	(d88bii''t{l+u}u|m.LLLr!   c                 Z    |t          d          ||                     |                   S )Nr   )rC   treeposr   s     r    r   zNombankTreePointer.select  s-    <7888DLL&&''r!   c                    |t          d          |g}g }d}	 t          |d         t                    rt          |          t          |          k     r|                    d           n|dxx         dz  cc<   |d         t          |d                   k     r(|                    |d         |d                             n||                                 |                                 nS|| j        k    r/t          |dt          |          | j        z
  dz
                     S |dz  }|                                 &)z}
        Convert this pointer to a standard 'tree position' pointer,
        given that it points to the given tree.
        Nr   r   Tr   )	rC   r   r   r   rO   r   rh   rl   r   )r   r   stackr   rh   s        r    r   zNombankTreePointer.treepos  s4   
 <7888	 %)T**  w<<#e**,,NN1%%%%BKKK1$KKK2;U2Y//LLr72;!78888 IIKKKKKMMMM dl** )I3w<<$++E+I)I!JKKKqLGIIKKK+	 r!   N)r`   ra   rb   rc   r   r   rE   r|   rr   r   r   r   r   r   rV   r!   r    r   r   p  s            B B \B*/ / /J J JM M M! ! !M M M( ( (
              r!   r   N)	functoolsr   	xml.etreer   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.internalsr   	nltk.treer   r   r	   r\   r   r   r   r   rV   r!   r    <module>r      s   % $ $ $ $ $ ! ! ! ! ! ! $ $ $ $ % % % % 2 2 2 2 2 2      P P P P P, P P PpJ
 J
 J
 J
 J
 J
 J
 J
Z( ( ( ( ( ( ( ((F F F F Fn F F F&F F F F Fn F F F$ a  a  a  a  a  a  a  a  a  a r!   