
    '[f                     f    d dl Z d dlT d dlT d dlmZmZ  G d de          Z G d de          Z	dS )    N)*)map_tag	str2tuplec                       e Zd ZdZd Zd ZdS )SwitchboardTurnaE  
    A specialized list object used to encode switchboard utterances.
    The elements of the list are the words in the utterance; and two
    attributes, ``speaker`` and ``id``, are provided to retrieve the
    spearker identifier and utterance id.  Note that utterance ids
    are only unique within a given discourse.
    c                 r    t                               | |           || _        t          |          | _        d S N)list__init__speakerintid)selfwordsr   r   s       R/var/www/piapp/venv/lib/python3.11/site-packages/nltk/corpus/reader/switchboard.pyr   zSwitchboardTurn.__init__   s.    dE"""b''    c                     t          |           dk    rd}nPt          | d         t                    r d                    d | D                       }nd                    |           }d| j         d| j         d|dS )	Nr     c              3       K   | ]	}d |z  V  
dS )z%s/%sN ).0ws     r   	<genexpr>z+SwitchboardTurn.__repr__.<locals>.<genexpr>    s&      66AGaK666666r   <.z: >)len
isinstancetuplejoinr   r   )r   texts     r   __repr__zSwitchboardTurn.__repr__   s    t99>>DDQ'' 	"886666666DD88D>>D64<66$'66T6666r   N)__name__
__module____qualname____doc__r   r#   r   r   r   r   r      s<           
7 7 7 7 7r   r   c                       e Zd ZdgZddZd ZddZd ZddZd Z	dd
Z
d ZddZd ZddZd ZddZ ej        d          ZdZddZdS )SwitchboardCorpusReadertaggedNc                 V    t                               | || j                   || _        d S r	   )CorpusReaderr   _FILES_tagset)r   roottagsets      r   r   z SwitchboardCorpusReader.__init__+   s'    dD$+666r   c                 R    t          |                     d          | j                  S Nr*   )StreamBackedCorpusViewabspath_words_block_readerr   s    r   r   zSwitchboardCorpusReader.words/   !    %dll8&<&<d>VWWWr   c                 X      fd}t                               d          |          S )Nc                 0                         |           S r	   )_tagged_words_block_readerstreamr   r0   s    r   tagged_words_block_readerzGSwitchboardCorpusReader.tagged_words.<locals>.tagged_words_block_reader3       2266BBBr   r*   r3   r4   )r   r0   r=   s   `` r   tagged_wordsz$SwitchboardCorpusReader.tagged_words2   G    	C 	C 	C 	C 	C 	C &dll8&<&<>WXXXr   c                 R    t          |                     d          | j                  S r2   )r3   r4   _turns_block_readerr6   s    r   turnszSwitchboardCorpusReader.turns8   r7   r   c                 X      fd}t                               d          |          S )Nc                 0                         |           S r	   )_tagged_turns_block_readerr;   s    r   tagged_turns_block_readerzGSwitchboardCorpusReader.tagged_turns.<locals>.tagged_turns_block_reader<   r>   r   r*   r?   )r   r0   rH   s   `` r   tagged_turnsz$SwitchboardCorpusReader.tagged_turns;   rA   r   c                 R    t          |                     d          | j                  S r2   )r3   r4   _discourses_block_readerr6   s    r   
discoursesz"SwitchboardCorpusReader.discoursesA   s(    %LL""D$A
 
 	
r   Fc                 X      fd}t                               d          |          S )Nc                 0                         |           S r	   _tagged_discourses_block_readerr;   s    r   tagged_discourses_block_readerzQSwitchboardCorpusReader.tagged_discourses.<locals>.tagged_discourses_block_readerG   s    77GGGr   r*   r?   )r   r0   rQ   s   `` r   tagged_discoursesz)SwitchboardCorpusReader.tagged_discoursesF   sN    	H 	H 	H 	H 	H 	H &LL""$B
 
 	
r   c                 <      fdt          |          D             gS )Nc                     g | ]E}|                     d           D ]-}|                                                    |d          .FS )
F)include_tagsplitstrip_parse_utterance)r   bur   s      r   
<listcomp>zDSwitchboardCorpusReader._discourses_block_reader.<locals>.<listcomp>Q   sk         7799	%%aU%;;   r   read_blankline_blockr   r<   s   ` r   rK   z0SwitchboardCorpusReader._discourses_block_readerN   s;       -f55  
 	
r   c                 @      fdt          |          D             gS )Nc                     g | ]F}|                     d           D ].}|                                                    |d          /GS )rU   T)rV   r0   rW   )r   r[   r\   r   r0   s      r   r]   zKSwitchboardCorpusReader._tagged_discourses_block_reader.<locals>.<listcomp>\   sm         7799	%%aT&%II   r   r^   r   r<   r0   s   ` `r   rP   z7SwitchboardCorpusReader._tagged_discourses_block_readerY   sA        -f55  
 	
r   c                 8    |                      |          d         S Nr   )rK   r`   s     r   rC   z+SwitchboardCorpusReader._turns_block_readerd   s    ,,V44Q77r   c                 :    |                      ||          d         S re   rO   rc   s      r   rG   z2SwitchboardCorpusReader._tagged_turns_block_readerg   s    33FFCCAFFr   c                 T    t          |                     |          d         g           S re   )sumrK   r`   s     r   r5   z+SwitchboardCorpusReader._words_block_readerj   s%    40088;R@@@r   c                 V    t          |                     ||          d         g           S re   )rh   rP   rc   s      r   r:   z2SwitchboardCorpusReader._tagged_words_block_readerm   s'    477GGJBOOOr   z(\w+)\.(\d+)\:\s*(.*)/c                 J     j                             |          }|t          d|z            |                                \  }}} fd|                                D             }|sd |D             }nr j        k    r fd|D             }t          |||          S )NzBad utterance %rc                 :    g | ]}t          |j                  S r   )r   _SEP)r   sr   s     r   r]   z<SwitchboardCorpusReader._parse_utterance.<locals>.<listcomp>x   s%    ???Q1di((???r   c                     g | ]\  }}|S r   r   )r   r   ts      r   r]   z<SwitchboardCorpusReader._parse_utterance.<locals>.<listcomp>z   s    +++6AqQ+++r   c                 F    g | ]\  }}|t          j        |          fS r   )r   r.   )r   r   rp   r   r0   s      r   r]   z<SwitchboardCorpusReader._parse_utterance.<locals>.<listcomp>|   s0    OOOv1avq99:OOOr   )_UTTERANCE_REmatch
ValueErrorgroupsrX   r.   r   )	r   	utterancerV   r0   mr   r   r"   r   s	   `  `     r   rZ   z(SwitchboardCorpusReader._parse_utterances   s    $$Y//9/);<<<HHJJT????$**,,??? 	P++U+++EE 	P$,..OOOOOOOOEugr222r   r	   )F)r$   r%   r&   r-   r   r   r@   rD   rI   rL   rR   rK   rP   rC   rG   r5   r:   recompilerr   rm   rZ   r   r   r   r)   r)   &   sG       ZF   X X XY Y Y YX X XY Y Y Y
 
 


 
 
 
	
 	
 	
	
 	
 	
 	
8 8 8G G G GA A AP P P P BJ788MD
3 
3 
3 
3 
3 
3r   r)   )
rx   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.tagr   r   r
   r   r,   r)   r   r   r   <module>r}      s    
			 $ $ $ $ % % % % ' ' ' ' ' ' ' '7 7 7 7 7d 7 7 70W3 W3 W3 W3 W3l W3 W3 W3 W3 W3r   