
    '[f)                        d Z ddlZddlZddlT ddlmZ ddlmZ ddlT ddl	m
Z
  G d de          Z G d	 d
e          Z G d de          Zi dddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.i d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdNi dPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqi drdsdtdudvdwdxdydzd{d|d}d~dddddddddddddddddddddi ddddddddddddddddddddddddddddddddddddddddddddddddddŜZdS )a  
Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old
English Prose (YCOE), a 1.5 million word syntactically-annotated
corpus of Old English prose texts. The corpus is distributed by the
Oxford Text Archive: http://www.ota.ahds.ac.uk/ It is not included
with NLTK.

The YCOE corpus is divided into 100 files, each representing
an Old English prose text. Tags used within each text complies
to the YCOE standard: https://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm
    N)*)BracketParseCorpusReader)TaggedCorpusReader)RegexpTokenizerc                   h    e Zd ZdZddZddZddZd ZddZdd	Z	dd
Z
ddZddZddZddZdS )YCOECorpusReaderz
    Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old
    English Prose (YCOE), a 1.5 million word syntactically-annotated
    corpus of Old English prose texts.
    utf8c                 \   t                               | |g |           t          | j                            d          dd|          | _        t          | j                            d          dd          | _        d | j                                        D             }d | j                                        D             |k    rt          d	          t          d
 |D             d |D             z             }t                               | |||           t          |          | _        d S )Npsdz.*.psd)encodingpos.posc                 "    h | ]}|d d         S N .0fs     K/var/www/piapp/venv/lib/python3.11/site-packages/nltk/corpus/reader/ycoe.py	<setcomp>z,YCOECorpusReader.__init__.<locals>.<setcomp>.   s     @@@QssV@@@    c                 "    h | ]}|d d         S r   r   r   s     r   r   z,YCOECorpusReader.__init__.<locals>.<setcomp>/   s     777qAcrcF777r   z5Items in "psd" and "pos" subdirectories do not match.c                     g | ]}d |z  S z%s.psdr   r   docs     r   
<listcomp>z-YCOECorpusReader.__init__.<locals>.<listcomp>3   s    111X^111r   c                     g | ]}d |z  S z%s.posr   r   s     r   r   z-YCOECorpusReader.__init__.<locals>.<listcomp>4   s    333#x#~333r   )CorpusReader__init__YCOEParseCorpusReaderrootjoin_psd_readerYCOETaggedCorpusReader_pos_readerfileids
ValueErrorsorted
_documents)selfr%   r   	documentsr*   s        r   r#   zYCOECorpusReader.__init__%   s/   dD"h7770INN5!!4(
 
 
 2$)..2G2GvVV A@T%5%=%=%?%?@@@	77D,44667779DDWXXX11y111333334
 
 	dD'8<<< ++r   Nc                     || j         S t          |t                    r|g}|D ]}|| j        vrt	          d|z            t          d |D                       S )z
        Return a list of document identifiers for all documents in
        this corpus, or for the documents with the given file(s) if
        specified.
        NzFile id %s not foundc                 "    h | ]}|d d         S r   r   r   s     r   r   z-YCOECorpusReader.documents.<locals>.<setcomp>G   s     ///!q"v///r   )r-   
isinstancestr_fileidsKeyErrorr,   )r.   r*   r   s      r   r/   zYCOECorpusReader.documents9   s     ??"gs## 	 iG 	A 	AA%%5?@@@ & //w///000r   c                     || j         S t          |t                    r|g}t          t	          d |D             d |D             z                       S )z
        Return a list of file identifiers for the files that make up
        this corpus, or that store the given document(s) if specified.
        Nc                     g | ]}d |z  S r!   r   r   s     r   r   z,YCOECorpusReader.fileids.<locals>.<listcomp>T   s    555CC555r   c                     g | ]}d |z  S r   r   r   s     r   r   z,YCOECorpusReader.fileids.<locals>.<listcomp>U   s    777c8c>777r   )r4   r2   r3   r,   setr.   r/   s     r   r*   zYCOECorpusReader.fileidsI   sq    
 = 	3'' 	$"I55955577Y7778 
 
 	
r   c                     || j         }nSt          |t                    r|g}|D ]8}|| j         vr-|dd         dv rt          d          t          d|z            9fd|D             S )z
        Helper that selects the appropriate fileids for a given set of
        documents from a given subcorpus (pos or psd).
        Nr   )r   r   zvExpected a document identifier, not a file identifier.  (Use corpus.documents() to get a list of document identifiers.z Document identifier %s not foundc                     g | ]	}| d  
S ).r   )r   d	subcorpuss     r   r   z0YCOECorpusReader._getfileids.<locals>.<listcomp>m   s'    666q1""y""666r   )r-   r2   r3   r+   )r.   r/   r?   documents     ` r   _getfileidszYCOECorpusReader._getfileidsY   s    
 II)S)) (&K	% 	X 	X4?22}(888(>   ))Kh)VWWW 3 7666I6666r   c                 ^    | j                             |                     |d                    S Nr   )r)   wordsrA   r:   s     r   rD   zYCOECorpusReader.wordsp   )    %%d&6&6y%&H&HIIIr   c                 ^    | j                             |                     |d                    S rC   )r)   sentsrA   r:   s     r   rG   zYCOECorpusReader.sentss   rE   r   c                 ^    | j                             |                     |d                    S rC   )r)   parasrA   r:   s     r   rI   zYCOECorpusReader.parasv   rE   r   c                 ^    | j                             |                     |d                    S rC   )r)   tagged_wordsrA   r:   s     r   rK   zYCOECorpusReader.tagged_wordsy   )    ,,T-=-=i-O-OPPPr   c                 ^    | j                             |                     |d                    S rC   )r)   tagged_sentsrA   r:   s     r   rN   zYCOECorpusReader.tagged_sents|   rL   r   c                 ^    | j                             |                     |d                    S rC   )r)   tagged_parasrA   r:   s     r   rP   zYCOECorpusReader.tagged_paras   rL   r   c                 ^    | j                             |                     |d                    S )Nr   )r'   parsed_sentsrA   r:   s     r   rR   zYCOECorpusReader.parsed_sents   rL   r   r	   )N)__name__
__module____qualname____doc__r#   r/   r*   rA   rD   rG   rI   rK   rN   rP   rR   r   r   r   r   r      s        , , , ,(1 1 1 1 
 
 
 
 7 7 7.J J J JJ J J JJ J J JQ Q Q QQ Q Q QQ Q Q QQ Q Q Q Q Qr   r   c                       e Zd ZdZd ZdS )r$   zrSpecialized version of the standard bracket parse corpus reader
    that strips out (CODE ...) and (ID ...) nodes.c                     t          j        dd|          }t          j        d|          rd S t          j        | |          S )Nz(?u)\((CODE|ID)[^\)]*\) z\s*\(\s*\)\s*$)resubmatchr   _parse)r.   ts     r   r^   zYCOEParseCorpusReader._parse   sC    F-r1558%q)) 	4'.tQ777r   N)rT   rU   rV   rW   r^   r   r   r   r$   r$      s-        6 68 8 8 8 8r   r$   c                       e Zd ZddZdS )r(   r	   c                 ^    d}t          |d          }t          j        | ||d|           d S )Nz+(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*T)gaps_)sepsent_tokenizer)r   r   r#   )r.   r%   itemsr   gaps_rere   s         r   r#   zYCOETaggedCorpusReader.__init__   sH    @(t<<<#$3~	
 	
 	
 	
 	
 	
r   NrS   )rT   rU   rV   r#   r   r   r   r(   r(      s(        
 
 
 
 
 
r   r(   zcoadrian.o34zAdrian and Ritheuszcoaelhom.o3u   Ælfric, Supplemental Homilieszcoaelive.o3u   Ælfric's Lives of SaintscoalcuinzAlcuin De virtutibus et vitiisz
coalex.o23zAlexander's Letter to Aristotlezcoapollo.o3zApollonius of Tyrecoaugust	Augustinez	cobede.o2z$Bede's History of the English Churchzcobenrul.o3zBenedictine Rulezcoblick.o23zBlickling Homiliesz
coboeth.o2z#Boethius' Consolation of Philosophyzcobyrhtf.o3zByrhtferth's Manual	cocanedgDzCanons of Edgar (D)	cocanedgXzCanons of Edgar (X)zcocathom1.o3u   Ælfric's Catholic Homilies Izcocathom2.o3u   Ælfric's Catholic Homilies IIz
cochad.o24z
Saint ChadcochdrulzChrodegang of Metz, RulecochristophzSaint ChristopherzcochronA.o23zAnglo-Saxon Chronicle AcochronCzAnglo-Saxon Chronicle CcochronDzAnglo-Saxon Chronicle DzcochronE.o34zAnglo-Saxon Chronicle Ez	cocura.o2zCura PastoraliscocuraCzCura Pastoralis (Cotton)zcodicts.o34zDicts of Catoz
codocu1.o1zDocuments 1 (O1)zcodocu2.o12zDocuments 2 (O1/O2)z
codocu2.o2zDocuments 2 (O2)zcodocu3.o23zDocuments 3 (O2/O3)z
codocu3.o3zDocuments 3 (O3)zcodocu4.o24zDocuments 4 (O2/O4)coeluc1z Honorius of Autun, Elucidarium 1coeluc2zcoepigen.o3u   Ælfric's Epilogue to GenesiscoeuphrzSaint Euphrosynecoeustz Saint Eustace and his companions	coexodusPz
Exodus (P)	cogenesiCzGenesis (C)zcogregdC.o24zGregory's Dialogues (C)zcogregdH.o23zGregory's Dialogues (H)coherbarzPseudo-Apuleius, HerbariumzcoinspolD.o34z"Wulfstan's Institute of Polity (D)	coinspolXz"Wulfstan's Institute of Polity (X)cojameszSaint Jameszcolacnu.o23Lacnungaz
colaece.o2	Leechdomszcolaw1cn.o3zLaws, Cnut Izcolaw2cn.o3zLaws, Cnut IIzcolaw5atr.o3u   Laws, Æthelred Vzcolaw6atr.o3u   Laws, Æthelred VIz
colawaf.o2zLaws, Alfredzcolawafint.o2zAlfred's Introduction to Lawszcolawger.o34zLaws, Gerefazcolawine.ox2z	Laws, Inezcolawnorthu.o3zNorthumbra Preosta Laguzcolawwllad.o4zLaws, William I, Ladzcoleofri.o4Leofriczcolsigef.o3u   Ælfric's Letter to Sigefyrth	colsigewBu!   Ælfric's Letter to Sigeweard (B)zcolsigewZ.o34u!   Ælfric's Letter to Sigeweard (Z)colwgeatu   Ælfric's Letter to Wulfgeat	colwsigeTu    Ælfric's Letter to Wulfsige (T)zcolwsigeXa.o34u!   Ælfric's Letter to Wulfsige (Xa)zcolwstan1.o3u   Ælfric's Letter to Wulfstan Izcolwstan2.o3u   Ælfric's Letter to Wulfstan IIzcomargaC.o34zSaint Margaret (C)comargaTzSaint Margaret (T)comart1zMartyrology, Icomart2zMartyrology, IIzcomart3.o23zMartyrology, IIIzcomarvel.o23zMarvels of the EastcomaryzMary of Egyptconeotz
Saint NeotconicodAzGospel of Nicodemus (A)conicodCzGospel of Nicodemus (C)conicodDzGospel of Nicodemus (D)conicodEzGospel of Nicodemus (E)zcoorosiu.o2Orosiusz
cootest.o3
Heptateuchzcoprefcath1.o3u(   Ælfric's Preface to Catholic Homilies Izcoprefcath2.o3u)   Ælfric's Preface to Catholic Homilies IIzcoprefcura.o2zPreface to the Cura Pastoraliszcoprefgen.o3u   Ælfric's Preface to Genesiszcopreflives.o3u$   Ælfric's Preface to Lives of Saintsz"Preface to Augustine's Soliloquiesz*Pseudo-Apuleius, Medicina de quadrupedibuszHistory of the Holy Rood-TreezSeven SleeperszSt. Augustine's SoliloquieszSolomon and Saturn IzSolomon and Saturn IIu   Ælfric's De Temporibus AnnizVercelli HomilieszVercelli Homilies (E)zVercelli Homilies (L)zSaint Vincent (Bodley 343)zVindicta SalvatoriszWest-Saxon GospelszWulfstan's Homilies)coprefsolilozcoquadru.o23corood	cosevenslcosolilozcosolsat1.o4	cosolsat2z
cotempo.o3coverhom	coverhomE	coverhomLcovinceBcovinsalzcowsgosp.o3z
cowulf.o34)rW   osr[   nltk.corpus.reader.api nltk.corpus.reader.bracket_parser   nltk.corpus.reader.taggedr   nltk.corpus.reader.utilnltk.tokenizer   r"   r   r$   r(   r/   r   r   r   <module>r      s}  
 
 
			 				 $ $ $ $ E E E E E E 8 8 8 8 8 8 % % % % ) ) ) ) ) )eQ eQ eQ eQ eQ| eQ eQ eQP8 8 8 8 84 8 8 8
 
 
 
 
/ 
 
 
e(e3e .e 0	e
 3e 'e e 7e %e 'e 7e (e &e &e 3e  4!e" ,#e e$ *%e& &'e( -)e* )+e, )-e. -/e0 "1e2 )3e4 ?5e6 $7e8 (9e: $;e< (=e> $?e@ (AeB 1CeD 1Ee e eF 2GeH !IeJ 0KeL MeN OeP -QeR -SeT ,UeV 9WeX 5YeZ }[e\ :]e^ +_e` >aeb ?ced 'eef (ge e eh .iej 4kel Nmen Koep /qer +set 9uev 2wex 4yez 8{e| .}e~ 3e@ 9AeB 4CeD 5EeF (GeH $Ie e eJ KeL  MeN %OeP )QeR oSeT lUeV )WeX )YeZ )[e\ )]e^ 9_e` ,aeb @ced Aeef 5geh 2iej <ke el 9@-!-*(0#((,%''Ie e e			r   