
    '[f<c                     N    d Z ddlZddlZddlmZ ddlmZ  G d de          ZdS )z
An NLTK interface to the VerbNet verb lexicon

For details about VerbNet see:
https://verbs.colorado.edu/~mpalmer/projects/verbnet.html
    N)defaultdict)XMLCorpusReaderc                   <   e Zd ZdZd$dZ ej        d          Z	  ej        d          Z	  ej        d          Z		 d%dZ
d%d	Zd&d
Zd Zd%dZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd'dZd'dZd'dZd'dZd'dZ d'd Z!d'd!Z"d'd"Z#d'd#Z$dS )(VerbnetCorpusReadera  
    An NLTK interface to the VerbNet verb lexicon.

    From the VerbNet site: "VerbNet (VN) (Kipper-Schuler 2006) is the largest
    on-line verb lexicon currently available for English. It is a hierarchical
    domain-independent, broad-coverage verb lexicon with mappings to other
    lexical resources such as WordNet (Miller, 1990; Fellbaum, 1998), XTAG
    (XTAG Research Group, 2001), and FrameNet (Baker et al., 1998)."

    For details about VerbNet see:
    https://verbs.colorado.edu/~mpalmer/projects/verbnet.html
    Fc                     t          j        | |||           t          t                    | _        	 t          t                    | _        	 i | _        	 i | _        |                                  d S N)	r   __init__r   list_lemma_to_class_wordnet_to_class_class_to_fileid_shortid_to_longid_quick_index)selfrootfileids
wrap_etrees       N/var/www/piapp/venv/lib/python3.11/site-packages/nltk/corpus/reader/verbnet.pyr	   zVerbnetCorpusReader.__init__%   sy     tWjAAA*400	& "-T!2!2	/ !#	B #%
 	    z([^\-\.]*)-([\d+.\-]+)$z
[\d+.\-]+$zH<MEMBER name="\??([^"]+)" wn="([^"]*)"[^>]+>|<VNSUBCLASS ID="([^"]+)"/?>Nc                     |&t          | j                                                  S t          |t                    r|                     |          }d |                    d          D             S )zx
        Return a list of all verb lemmas that appear in any class, or
        in the ``classid`` if specified.
        Nc                 8    g | ]}|                     d           S )nameget.0members     r   
<listcomp>z.VerbnetCorpusReader.lemmas.<locals>.<listcomp>S   s$    WWW6FJJv&&WWWr   MEMBERS/MEMBER)sortedr   keys
isinstancestrvnclassfindallr   r$   s     r   lemmaszVerbnetCorpusReader.lemmasH   sl    
 ?$.3355666 '3'' 0,,w//WWW__EU5V5VWWWWr   c                     |&t          | j                                                  S t          |t                    r|                     |          }t          d |                    d          D             g           S )z|
        Return a list of all wordnet identifiers that appear in any
        class, or in ``classid`` if specified.
        Nc              3   f   K   | ],}|                     d d                                          V  -dS )wn N)r   splitr   s     r   	<genexpr>z1VerbnetCorpusReader.wordnetids.<locals>.<genexpr>a   sP         JJtR((..00     r   r   )r    r   r!   r"   r#   r$   sumr%   r&   s     r   
wordnetidszVerbnetCorpusReader.wordnetidsU   s    
 ?$05577888 '3'' 0,,w// ")//2B"C"C     r   c                 F   %fd| j                                         D             S || j        |         S || j        |         S |4|                     |          }d |                    d          D             S t          | j                                                   S )a  
        Return a list of the VerbNet class identifiers.  If a file
        identifier is specified, then return only the VerbNet class
        identifiers for classes (and subclasses) defined by that file.
        If a lemma is specified, then return only VerbNet class
        identifiers for classes that contain that lemma as a member.
        If a wordnetid is specified, then return only identifiers for
        classes that contain that wordnetid as a member.  If a classid
        is specified, then return only identifiers for subclasses of
        the specified VerbNet class.
        If nothing is specified, return all classids within VerbNet
        Nc                 &    g | ]\  }}|k    |S  r2   )r   cffileids      r   r   z0VerbnetCorpusReader.classids.<locals>.<listcomp>v   s"    QQQ&1aQ&[[A[[[r   c                 8    g | ]}|                     d           S IDr   r   subclasss     r   r   z0VerbnetCorpusReader.classids.<locals>.<listcomp>}   s4        T""  r   SUBCLASSES/VNSUBCLASS)r   itemsr   r   r$   r%   r    r!   )r   lemma	wordnetidr5   classidxmltrees      `  r   classidszVerbnetCorpusReader.classidsh   s     QQQQD$9$?$?$A$AQQQQ'.."))44 ll7++G  '0G H H   
 $/4466777r   c                    || j         v r|                     |          S |                     |          }|| j        v r| j        |                     |                   }|                     |          }||                    d          k    r|S |                    d          D ]}||                    d          k    r|c S  J t          d|           )a  Returns VerbNet class ElementTree

        Return an ElementTree containing the xml for the specified
        VerbNet class.

        :param fileid_or_classid: An identifier specifying which class
            should be returned.  Can be a file identifier (such as
            ``'put-9.1.xml'``), or a VerbNet class identifier (such as
            ``'put-9.1'``) or a short VerbNet class identifier (such as
            ``'9.1'``).
        r8   z.//VNSUBCLASSFzUnknown identifier )_fileidsxmllongidr   r   r%   
ValueError)r   fileid_or_classidr?   r5   treer:   s         r   r$   zVerbnetCorpusReader.vnclass   s     --88-... ++/00d+++*4;;w+?+?@F88F##D$((4..(( $_ = = ! !H(,,t"4"444' 5 !L F3DFFGGGr   c                      | j         S t          |t                    r! j                             |                   gS  fd|D             S )z
        Return a list of fileids that make up this corpus.  If
        ``vnclass_ids`` is specified, then return the fileids that make
        up the specified VerbNet class(es).
        Nc                 P    g | ]"}j                             |                   #S r2   )r   rE   )r   
vnclass_idr   s     r   r   z/VerbnetCorpusReader.fileids.<locals>.<listcomp>   s=        %dkk*&=&=>  r   )rC   r"   r#   r   rE   )r   vnclass_idss   ` r   r   zVerbnetCorpusReader.fileids   sm     = S)) 	)$++k*B*BCDD   "-   r   c           	      ^   t          |t                    r|                     |          }g }|                    d          }|D ]h}|                    |                     |          |                     |          |                     |          |                     |          d           i|S )ap  Given a VerbNet class, this method returns VerbNet frames

        The members returned are:
        1) Example
        2) Description
        3) Syntax
        4) Semantics

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        :return: frames - a list of frame dictionaries
        zFRAMES/FRAME)exampledescriptionsyntax	semantics)	r"   r#   r$   r%   append_get_example_within_frame_get_description_within_frame _get_syntactic_list_within_frame_get_semantics_within_frame)r   r$   framesvnframesvnframes        r   rW   zVerbnetCorpusReader.frames   s     gs## 	,ll7++G??>22 	 	GMM#==gFF#'#E#Eg#N#N"CCGLL!%!A!A'!J!J	     r   c                     t          |t                    r|                     |          }d |                    d          D             }|S )aA  Returns subclass ids, if any exist

        Given a VerbNet class, this method returns subclass ids (if they exist)
        in a list of strings.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        :return: list of subclasses
        c                 8    g | ]}|                     d           S r7   r   r9   s     r   r   z2VerbnetCorpusReader.subclasses.<locals>.<listcomp>   s1     
 
 
#+HLL
 
 
r   r;   )r"   r#   r$   r%   )r   r$   
subclassess      r   r\   zVerbnetCorpusReader.subclasses   sZ     gs## 	,ll7++G
 
/6?V/W/W
 
 

 r   c                    t          |t                    r|                     |          }g }|                    d          D ]J}|                    |                    d          d |                    d          D             d           K|S )ab  Returns thematic roles participating in a VerbNet class

        Members returned as part of roles are-
        1) Type
        2) Modifiers

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        :return: themroles: A list of thematic roles in the VerbNet class
        zTHEMROLES/THEMROLEtypec                 d    g | ]-}|                     d           |                     d          d.S Valuer^   valuer^   r   r   restrs     r   r   z1VerbnetCorpusReader.themroles.<locals>.<listcomp>   sH     " " "! #())G"4"4eii>O>OPP" " "r   SELRESTRS/SELRESTR)r^   	modifiers)r"   r#   r$   r%   rR   r   )r   r$   	themrolestroles       r   rh   zVerbnetCorpusReader.themroles   s     gs## 	,ll7++G	__%9:: 		 		E!IIf--" "%*]]3G%H%H" " "     r   c                 l    | j         D ]+}|                     |                     |          |           ,dS )aC  
        Initialize the indexes ``_lemma_to_class``,
        ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning
        through the corpus fileids.  This is fast if ElementTree
        uses the C implementation (<0.1 secs), but quite slow (>10 secs)
        if only the python implementation is available.
        N)rC   _index_helperrD   )r   r5   s     r   _indexzVerbnetCorpusReader._index  sD     m 	9 	9Ftxx//8888	9 	9r   c                    |                     d          }|| j        |<   || j        |                     |          <   |                    d          D ]}| j        |                     d                                       |           |                     dd                                          D ]"}| j        |                             |           #|                    d          D ]}| 	                    ||           dS )zHelper for ``_index()``r8   r   r   r*   r+   r;   N)
r   r   r   shortidr%   r   rR   r,   r   rk   )r   r@   r5   r$   r   r*   r:   s          r   rk   z!VerbnetCorpusReader._index_helper  s	   ++d##)/g&9@W 5 56oo&677 	; 	;F F!3!34;;GDDDjjr**0022 ; ;&r*11'::::;(?@@ 	1 	1Hx0000	1 	1r   c                    | j         D ]X}|dd         }|| j        |<   || j        |                     |          <   |                     |          5 }| j                            |                                          D ]}|                                }|d         d| j	        |d                  
                    |           |d                                         D ]"}| j        |         
                    |           #|d         6|| j        |d         <   |d         }|| j        |                     |          <   J d            	 ddd           n# 1 swxY w Y   ZdS )a  
        Initialize the indexes ``_lemma_to_class``,
        ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning
        through the corpus fileids.  This doesn't do proper xml parsing,
        but is good enough to find everything in the standard VerbNet
        corpus -- and it runs about 30 times faster than xml parsing
        (with the python ElementTree; only 2-3 times faster
        if ElementTree uses the C implementation).
        Nr         Fzunexpected match condition)rC   r   r   rn   open	_INDEX_REfinditerreadgroupsr   rR   r,   r   )r   r5   r$   fpmrw   r*   s          r   r   z VerbnetCorpusReader._quick_index  s    m 	C 	CFSbSkG-3D!'*=DD#DLL$9$9:6"" Cb00;; C CAXXZZFay,,VAY7>>wGGG"()//"3"3 G GB 226==gFFFFG.;A-fQi8"()IP/W0E0EFFB&BBBBCC C C C C C C C C C C C C C C		C 	Cs   C7EE	E	c                     | j                             |          r|S | j                            |          st          d|z            	 | j        |         S # t
          $ r}t          d|z            |d}~ww xY w)zReturns longid of a VerbNet class

        Given a short VerbNet class identifier (eg '37.10'), map it
        to a long id (eg 'confess-37.10').  If ``shortid`` is already a
        long id, then return it as-isvnclass identifier %r not foundN)
_LONGID_REmatch_SHORTID_RErF   r   KeyError)r   rn   es      r   rE   zVerbnetCorpusReader.longid;  s     ?  )) 	JN!''00 	J>HIII	Q*733 	Q 	Q 	Q>HIIqP	Qs   
A 
A9!A44A9c                     | j                             |          r|S | j                            |          }|r|                    d          S t	          d|z            )zReturns shortid of a VerbNet class

        Given a long VerbNet class identifier (eg 'confess-37.10'),
        map it to a short id (eg '37.10').  If ``longid`` is already a
        short id, then return it as-is.rr   r{   )r~   r}   r|   grouprF   )r   rE   ry   s      r   rn   zVerbnetCorpusReader.shortidJ  sd     !!&)) 	MO!!&)) 	I771::>GHHHr   c                     g }|                     d          D ]d}d |                     d          D             }|                    |                    d          ||                    d          dk    d           e|S )an  Returns semantics within a single frame

        A utility function to retrieve semantics within a frame in VerbNet
        Members of the semantics dictionary:
        1) Predicate value
        2) Arguments

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: semantics: semantics dictionary
        zSEMANTICS/PREDc                 d    g | ]-}|                     d           |                     d          d.S )r^   rc   )r^   rc   r   )r   args     r   r   zCVerbnetCorpusReader._get_semantics_within_frame.<locals>.<listcomp>j  sF        37773C3CDD  r   zARGS/ARGrc   bool!)predicate_value	argumentsnegated)r%   rR   r   )r   rY   semantics_within_single_framepredr   s        r   rV   z/VerbnetCorpusReader._get_semantics_within_frame\  s     )+%OO$455 	 	D <<
33  I *00'+xx'8'8!*#xx//36     -,r   c                 H    |                     d          }||j        }nd}|S )a'  Returns example within a frame

        A utility function to retrieve an example within a frame in VerbNet.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: example_text: The example sentence for this particular frame
        zEXAMPLES/EXAMPLENr+   )findtext)r   rY   example_elementexample_texts       r   rS   z-VerbnetCorpusReader._get_example_within_framew  s1     ",,'9::&*/LLLr   c                 t    |                     d          }|j        d         |                    dd          dS )ab  Returns member description within frame

        A utility function to retrieve a description of participating members
        within a frame in VerbNet.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: description: a description dictionary with members - primary and secondary
        DESCRIPTIONprimary	secondaryr+   )r   r   )r   attribr   )r   rY   description_elements      r   rT   z1VerbnetCorpusReader._get_description_within_frame  sC     &ll=99*1)<,00bAA
 
 	
r   c                 b   g }|                     d          D ]}|j        }t                      }d|j        v r|                    d          nd|d<   d |                    d          D             |d<   d |                    d          D             |d	<   |                    ||d
           |S )a[  Returns semantics within a frame

        A utility function to retrieve semantics within a frame in VerbNet.
        Members of the syntactic dictionary:
        1) POS Tag
        2) Modifiers

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: syntax_within_single_frame
        SYNTAXrc   r+   c                 d    g | ]-}|                     d           |                     d          d.S r`   r   rd   s     r   r   zHVerbnetCorpusReader._get_syntactic_list_within_frame.<locals>.<listcomp>  H     & & &  ))G,,eii6G6GHH& & &r   rf   	selrestrsc                 d    g | ]-}|                     d           |                     d          d.S r`   r   rd   s     r   r   zHVerbnetCorpusReader._get_syntactic_list_within_frame.<locals>.<listcomp>  r   r   zSYNRESTRS/SYNRESTR	synrestrs)pos_tagrg   )r   tagdictr   r   r%   rR   )r   rY   syntax_within_single_frameeltr   rg   s         r   rU   z4VerbnetCorpusReader._get_syntactic_list_within_frame  s     &("<<)) 	 	CgGI5<
5J5J!1!1!1PRIg& & [[)=>>& & &Ik"& & [[)=>>& & &Ik" '--#)<<    *)r   c                    t          |t                    r|                     |          }|                    d          dz   }||                     |d          dz   z  }||                     |d          dz   z  }|dz  }||                     |d          dz   z  }|dz  }||                     |d          z  }|S )a%  Returns pretty printed version of a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        r8   
  )indentz  Thematic roles:
    z
  Frames:
)r"   r#   r$   r   pprint_subclassespprint_memberspprint_themrolespprint_frames)r   r$   ss      r   pprintzVerbnetCorpusReader.pprint  s     gs## 	,ll7++GKK$	T##GD#99D@@	T   66==	""	T""76"::TAA	]	T777r   r+   c                     t          |t                    r|                     |          }|                     |          }|sdg}dd                    |          z   }t          j        |d||dz             S )a>  Returns pretty printed version of subclasses of VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class's subclasses.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        (none)zSubclasses:  F   r   initial_indentsubsequent_indent)r"   r#   r$   r\   jointextwrapfill)r   r$   r   r\   r   s        r   r   z%VerbnetCorpusReader.pprint_subclasses  s     gs## 	,ll7++G__W--
 	$"JSXXj111}r&FTM
 
 
 	
r   c                     t          |t                    r|                     |          }|                     |          }|sdg}dd                    |          z   }t          j        |d||dz             S )a?  Returns pretty printed version of members in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class's member verbs.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        r   z	Members: r   r   r   r   )r"   r#   r$   r'   r   r   r   )r   r$   r   membersr   s        r   r   z"VerbnetCorpusReader.pprint_members  s     gs## 	,ll7++G++g&& 	!jG#((7+++}r&FTM
 
 
 	
r   c                    t          |t                    r|                     |          }g }|                     |          D ]q}|dz   |                    d          z   }d |d         D             }|r+|d                    d                    |                    z  }|                    |           rd                    |          S )aH  Returns pretty printed version of thematic roles in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class's thematic roles.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        * r^   c                 0    g | ]}|d          |d         z   S rb   r2   )r   modifiers     r   r   z8VerbnetCorpusReader.pprint_themroles.<locals>.<listcomp>  s7        !HV$44  r   rg   [{}]r   r   )r"   r#   r$   rh   r   formatr   rR   )r   r$   r   piecesthemrolepiecerg   s          r   r   z$VerbnetCorpusReader.pprint_themroles  s     gs## 	,ll7++Gw// 	! 	!HTMHLL$8$88E  ( 5  I  <sxx	':':;;;MM%    yy   r   c                    t          |t                    r|                     |          }g }|                     |          D ]+}|                    |                     ||                     ,d                    |          S )a?  Returns pretty version of all frames in a VerbNet class

        Return a string containing a pretty-printed representation of
        the list of frames within the VerbNet class.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        r   )r"   r#   r$   rW   rR   _pprint_single_framer   )r   r$   r   r   rY   s        r   r   z!VerbnetCorpusReader.pprint_frames  s     gs## 	,ll7++G{{7++ 	F 	FGMM$33GVDDEEEEyy   r   c                     |                      ||          dz   }||                     ||dz             dz   z  }||                     ||dz             dz   z  }||dz   z  }||                     ||dz             z  }|S )a  Returns pretty printed version of a single frame in a VerbNet class

        Returns a string containing a pretty-printed representation of
        the given frame.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        r   r   z
  Syntax: z  Semantics:
r   ) _pprint_description_within_frame_pprint_example_within_frame_pprint_syntax_within_frame_pprint_semantics_within_frame)r   rY   r   frame_strings       r   r   z(VerbnetCorpusReader._pprint_single_frame  s     <<WfMMPTT99'6C<PPSWWW,,Wf|6KLLtS	
 	!111;;GVf_UUUr   c                 2    |d         r|dz   |d         z   S dS )a&  Returns pretty printed version of example within frame in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet frame example.

        :param vnframe: An ElementTree containing the xml contents of
            a Verbnet frame.
        rN   z
 Example: Nr2   )r   rY   r   s      r   r   z0VerbnetCorpusReader._pprint_example_within_frame0  s0     9 	>L(79+===	> 	>r   c                     ||d         d         z   }|d         d         r$|d                     |d         d                   z  }|S )a  Returns pretty printed version of a VerbNet frame description

        Return a string containing a pretty-printed representation of
        the given VerbNet frame description.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        rO   r   r   z ({})r   )r   rY   r   rO   s       r   r   z4VerbnetCorpusReader._pprint_description_within_frame<  sQ     w}5i@@=!+. 	O7>>'-*@*MNNNKr   c                    g }|d         D ]}|d         }g }d|d         v r/|d         d         r!|                     |d         d                    |d |d         d         |d         d         z   D             z  }|r+|d                    d	                    |                    z  }|                     |           |d	                    |          z   S )
a&  Returns pretty printed version of syntax within a frame in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet frame syntax.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        rP   r   rc   rg   c                 R    g | ]$}d                      |d         |d                   %S )z{}{}rc   r^   r   rd   s     r   r   zCVerbnetCorpusReader._pprint_syntax_within_frame.<locals>.<listcomp>Y  s>        eGneFm<<  r   r   r   r   r   )rR   r   r   )r   rY   r   r   elementr   modifier_lists          r   r   z/VerbnetCorpusReader._pprint_syntax_within_frameJ  s    x( 	! 	!GI&EM'+...7;3G3P.$$W[%9'%BCCC   K(5k*;78   M  @sxx'>'>???MM%    ((((r   c           	         g }|d         D ]T}d |d         D             }|                     |d         rdnd |d          dd	                    |           d
           Ud                    fd|D                       S )a,  Returns a pretty printed version of semantics within frame in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet frame semantics.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        rQ   c                     g | ]
}|d          S )rc   r2   )r   arguments     r   r   zFVerbnetCorpusReader._pprint_semantics_within_frame.<locals>.<listcomp>q  s    RRRx'*RRRr   r   r      ¬r+   r   (z, )r   c              3   &   K   | ]} d | V  dS )r   Nr2   )r   r   r   s     r   r-   zEVerbnetCorpusReader._pprint_semantics_within_frame.<locals>.<genexpr>u  s1      BB%F--e--BBBBBBr   )rR   r   )r   rY   r   r   	predicater   s     `   r   r   z2VerbnetCorpusReader._pprint_semantics_within_framef  s      - 	 	IRR9[;QRRRIMM$Y/744RoCT9UooX\XaXabkXlXlooo    yyBBBB6BBBBBBr   )Fr   )NNNN)r+   )%__name__
__module____qualname____doc__r	   recompiler|   r~   rt   r'   r/   rA   r$   r   rW   r\   rh   rl   rk   r   rE   rn   rV   rS   rT   rU   r   r   r   r   r   r   r   r   r   r   r2   r   r   r   r      sg           . 677JB"*]++K2
W I!X X X X   &8 8 8 88H H HB      8  $  >	9 	9 	9
1 
1 
1C C CDQ Q QI I I$- - -6   
 
 
 * * *D  *
 
 
 
(
 
 
 
(! ! ! !0! ! ! !    $
> 
> 
> 
>   ) ) ) )8C C C C C Cr   r   )r   r   r   collectionsr   nltk.corpus.reader.xmldocsr   r   r2   r   r   <module>r      s     
			  # # # # # # 6 6 6 6 6 6_	C _	C _	C _	C _	C/ _	C _	C _	C _	C _	Cr   