
    zIf.                     >   d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZ d Zd Z G d	 d
          Z G d de          Z G d de          Z edd          Z edd          Z edd          Z G d de          Z G d dee          ZdS )    )
namedtuple)partialwraps)CategorizedCorpusReader)PlaintextCorpusReader)concatread_blankline_block)blankline_tokenizesent_tokenizeword_tokenizec                 <     t                      fd            }|S )z
    A decorator that allows a function to be called with
    a single string of comma-separated values which become
    individual function arguments.
    c                     t                      }| D ]}t          |t                    r3|                    d |                    d          D                        Jt          |t                     r#|                    t          |                     |                    |           |                                D ]<\  }}t          |t                    r"d |                    d          D             ||<   = |i |S )Nc                 6    h | ]}|                                 S  strip.0parts     O/var/www/piapp/venv/lib/python3.11/site-packages/nltk/corpus/reader/markdown.py	<setcomp>z?comma_separated_string_args.<locals>.wrapper.<locals>.<setcomp>   s     FFFtdjjllFFF    ,c                 6    h | ]}|                                 S r   r   r   s     r   r   z?comma_separated_string_args.<locals>.wrapper.<locals>.<setcomp>   s     JJJ

JJJr   )list
isinstancestrappendsplitsetitems)argskwargs_argsargnamevaluefuncs         r   wrapperz,comma_separated_string_args.<locals>.wrapper   s    	" 	"C#s## "FFsyy~~FFFGGGGC&& "SXX&&&&S!!!!!<<>> 	K 	KKD%%%% KJJS9I9IJJJttU%f%%%r   )r   )r(   r)   s   ` r   comma_separated_string_argsr*   
   s5     4[[& & & & [& Nr   c                 `    t          |           }|r|                    |d                   gS |S Nr   )r	   render)streamparserblocks      r   read_parse_blankline_blockr1   #   s5     ((E )eAh''((Lr   c                   x    e Zd Zd Zd Zd Zed             Zed             Zed             Z	ed             Z
dS )	MarkdownBlockc                 "    || _         d| _        d S )N   )contenttruncate_at)selfr6   s     r   __init__zMarkdownBlock.__init__+   s    r   c                 Z    | j         j         dt          t          |                      dS )Nz	(content=))	__class____name__reprr   r8   s    r   __repr__zMarkdownBlock.__repr__/   s*    .)FFDTOOFFFFr   c                 p    | j         d | j                  t          | j                   | j        k    rdnd S )Nz... )r6   r7   lenr?   s    r   __str__zMarkdownBlock.__str__2   sJ    |-T--. GDL))D,<<<uu"G G	
r   c                     | j         S Nr6   r?   s    r   rawzMarkdownBlock.raw8   s
    |r   c                 *    t          | j                  S rF   )r   r6   r?   s    r   wordszMarkdownBlock.words<   s    T\***r   c                 >    d t          | j                  D             S )Nc                 ,    g | ]}t          |          S r   r   r   sents     r   
<listcomp>z'MarkdownBlock.sents.<locals>.<listcomp>B   s     LLLd##LLLr   )r   r6   r?   s    r   sentszMarkdownBlock.sents@   s     LLdl0K0KLLLLr   c                 >    d t          | j                  D             S )Nc                 @    g | ]}d  t          |          D             S )c                 ,    g | ]}t          |          S r   rM   rN   s     r   rP   z2MarkdownBlock.paras.<locals>.<listcomp>.<listcomp>G   s     AAAT]4  AAAr   )r   r   paras     r   rP   z'MarkdownBlock.paras.<locals>.<listcomp>F   s?     
 
 
 BA]4-@-@AAA
 
 
r   r
   r6   r?   s    r   paraszMarkdownBlock.parasD   -    
 
*4<88
 
 
 	
r   N)r=   
__module____qualname__r9   r@   rD   propertyrH   rJ   rQ   rX   r   r   r   r3   r3   *   s          G G G
 
 
   X + + X+ M M XM 
 
 X
 
 
r   r3   c                   `     e Zd Z fdZed             Zed             Zed             Z xZS )	CodeBlockc                 B    || _          t                      j        |  d S rF   )languagesuperr9   )r8   r`   r"   r<   s      r   r9   zCodeBlock.__init__M   s$     $r   c                 H    d | j                                         D             S )Nc                 ,    g | ]}t          |          S r   rM   r   lines     r   rP   z#CodeBlock.sents.<locals>.<listcomp>S   s     JJJd##JJJr   r6   
splitlinesr?   s    r   rQ   zCodeBlock.sentsQ   s$    JJ0G0G0I0IJJJJr   c                 4    | j                                         S rF   rf   r?   s    r   lineszCodeBlock.linesU   s    |&&(((r   c                 >    d t          | j                  D             S )Nc                 J    g | ] }d  |                                 D             !S )c                 ,    g | ]}t          |          S r   rM   rd   s     r   rP   z.CodeBlock.paras.<locals>.<listcomp>.<listcomp>\   s     ???T]4  ???r   )rg   rU   s     r   rP   z#CodeBlock.paras.<locals>.<listcomp>[   sA     
 
 
 @?T__->->???
 
 
r   rW   r?   s    r   rX   zCodeBlock.parasY   rY   r   )	r=   rZ   r[   r9   r\   rQ   ri   rX   __classcell__r<   s   @r   r^   r^   L   s                  K K XK ) ) X) 
 
 X
 
 
 
 
r   r^   c                        e Zd Z fdZ xZS )MarkdownSectionc                 P    || _         || _         t                      j        |  d S rF   )headinglevelra   r9   )r8   rr   rs   r"   r<   s       r   r9   zMarkdownSection.__init__b   s+    
$r   )r=   rZ   r[   r9   rm   rn   s   @r   rp   rp   a   s8                         r   rp   Imagezlabel, src, titleLinkzlabel, href, titleListzis_ordered, itemsc                   *     e Zd Zdd fd
Zd Z xZS )MarkdownCorpusReaderNr/   c                0   ddl m} ddlm} ddlm} || _        | j        , |d|          | _        | j                            |           |                    dt          t          | j                              t                      j        |i | d S )	Nr   )
MarkdownIt)RendererPlain)front_matter_plugin
commonmark)renderer_clspara_block_readerry   )markdown_itr{   mdit_plain.rendererr|   mdit_py_plugins.front_matterr}   r/   use
setdefaultr   r1   ra   r9   )r8   r/   r"   r#   r{   r|   r}   r<   s          r   r9   zMarkdownCorpusReader.__init__n   s    ******555555DDDDDD;$*\NNNDKKOO/000)CDK!X!X!X	
 	
 	
 	$)&)))))r   c                     t                      }|                     |          D ]/}|                    | j                            |                     0|S rF   )r   _para_block_readerextend_word_tokenizertokenize)r8   r.   rJ   rV   s       r   _read_word_blockz%MarkdownCorpusReader._read_word_block~   sS    ++F33 	> 	>DLL-66t<<====r   )r=   rZ   r[   r9   r   rm   rn   s   @r   rx   rx   m   sT        %) * * * * * * *       r   rx   c                       e Zd ZdZdddZed fd	            Zed fd	            Zed fd	            Zed fd		            Z	ed fd
	            Z
ed fd	            Zd Zd Zedd            Zd Zedd            Zd Zedd            Zd Zedd            Zd Zedd            Zd Zedd            Zd Zedd            Z xZS )CategorizedMarkdownCorpusReadera  
    A reader for markdown corpora whose documents are divided into
    categories based on their file identifiers.

    Based on nltk.corpus.reader.plaintext.CategorizedPlaintextCorpusReader:
    https://www.nltk.org/_modules/nltk/corpus/reader/api.html#CategorizedCorpusReader
    tags)	cat_fieldc                j   g d}t          fd|D                       st                      d<   t          j        |            t	          j        | g|R i  | j        L| j        sG| j        D ]A}|                     |          }|r$|d                             |g           | j        |<   >dS dS dS )a  
        Initialize the corpus reader. Categorization arguments
        (``cat_pattern``, ``cat_map``, and ``cat_file``) are passed to
        the ``CategorizedCorpusReader`` constructor.  The remaining arguments
        are passed to the ``MarkdownCorpusReader`` constructor.
        )cat_patterncat_mapcat_filec              3       K   | ]}|v V  	d S rF   r   )r   r%   r#   s     r   	<genexpr>z;CategorizedMarkdownCorpusReader.__init__.<locals>.<genexpr>   s'      55S3&=555555r   r   Nr   )	anydictr   r9   rx   _map_fileidsmetadataget)r8   r   r"   r#   cat_argsfile_idr   s      `   r   r9   z(CategorizedMarkdownCorpusReader.__init__   s     :995555H55555 	' !%F9(v666%d<T<<<V<<< 9  = H H==11 H)1!B)G)GDIg&	 !   H Hr   Nc                 F    t                                          |          S rF   )ra   
categories)r8   fileidsr<   s     r   r   z*CategorizedMarkdownCorpusReader.categories   s    ww!!'***r   c                 X    || j         S t                                          |          S rF   )r   ra   r   )r8   r   r<   s     r   r   z'CategorizedMarkdownCorpusReader.fileids   s'    = wwz***r   c                 n    t                                          |                     ||                    S rF   )ra   rH   _resolver8   r   r   r<   s      r   rH   z#CategorizedMarkdownCorpusReader.raw   s'    ww{{4==*==>>>r   c                 n    t                                          |                     ||                    S rF   )ra   rJ   r   r   s      r   rJ   z%CategorizedMarkdownCorpusReader.words   '    ww}}T]]7J??@@@r   c                 n    t                                          |                     ||                    S rF   )ra   rQ   r   r   s      r   rQ   z%CategorizedMarkdownCorpusReader.sents   r   r   c                 n    t                                          |                     ||                    S rF   )ra   rX   r   r   s      r   rX   z%CategorizedMarkdownCorpusReader.paras   r   r   c           	           t           fd                                          ||          d          D                       S )Nc                 F    g | ]\  }}                     ||           S ))encoding)
CorpusView)r   pathencreaderr8   s      r   rP   zECategorizedMarkdownCorpusReader.concatenated_view.<locals>.<listcomp>   s?       T3 fs;;  r   T)include_encoding)r   abspathsr   )r8   r   r   r   s   ``  r   concatenated_viewz1CategorizedMarkdownCorpusReader.concatenated_view   sg        #'==MM':66 $1 $ $  
 
 	
r   c                     ddl m fd| j                            |                                          D             S )Nr   )	safe_loadc                 F    g | ]}|j         d k     |j                  S )front_matter)typer6   )r   tr   s     r   rP   zCCategorizedMarkdownCorpusReader.metadata_reader.<locals>.<listcomp>   s<     
 
 
v'' Iai  '''r   )yamlr   r/   parseread)r8   r.   r   s     @r   metadata_readerz/CategorizedMarkdownCorpusReader.metadata_reader   sX    """"""
 
 
 
[&&v{{}}55
 
 
 	
r   c                 :    |                      | j        ||          S rF   )r   r   r8   r   r   s      r   r   z(CategorizedMarkdownCorpusReader.metadata   s    %%d&:GZPPPr   c                      j                             |                                          }t          d |          }t          d |          }t	                      }t          ||          D ]P\  }}|                    |          }|                    ||          }	|                    |||	dz                       Q fd|D             S )Nc                 .    | j         dk    o
| j        dk    S )Nr   blockquote_openrs   r   r   s    r   <lambda>zCCategorizedMarkdownCorpusReader.blockquote_reader.<locals>.<lambda>   s    aglBqv1B'B r   c                 .    | j         dk    o
| j        dk    S )Nr   blockquote_closer   r   s    r   r   zCCategorizedMarkdownCorpusReader.blockquote_reader.<locals>.<lambda>   s    aglCqv1C'C r      c           	          g | ];}t          j        j                            |j        j        d                     <S )Nenv)r3   r/   rendererr-   optionsr   r0   r8   s     r   rP   zECategorizedMarkdownCorpusReader.blockquote_reader.<locals>.<listcomp>   sW     
 
 
  $++E4;3FD+QQ 
 
 
r   r/   r   r   filterr   zipindexr   )
r8   r.   tokensopening_tokensclosing_tokensblockquotesocopening_indexclosing_indexs
   `         r   blockquote_readerz1CategorizedMarkdownCorpusReader.blockquote_reader   s    ""6;;==11BBF
 
  CCV
 
 ff77 	J 	JDAq"LLOOM"LLM::Mvmma6G&GHIIII
 
 
 
 %	
 
 
 	
r   c                 :    |                      | j        ||          S rF   )r   r   r   s      r   r   z+CategorizedMarkdownCorpusReader.blockquotes       %%d&<gzRRRr   c                 n    d | j                             |                                          D             S )Nc                 j    g | ]0}|j         d k    |j        dv t          |j        |j                  1S )r   )fence
code_block)rs   r   r^   infor6   r   r   s     r   rP   zECategorizedMarkdownCorpusReader.code_block_reader.<locals>.<listcomp>   sS     
 
 

 w!||*A A A 	 
 !B A Ar   )r/   r   r   r8   r.   s     r   code_block_readerz1CategorizedMarkdownCorpusReader.code_block_reader   s=    
 

 [&&v{{}}55
 
 
 	
r   c                 :    |                      | j        ||          S rF   )r   r   r   s      r   code_blocksz+CategorizedMarkdownCorpusReader.code_blocks   r   r   c                     d t          d | j                            |                                                    D             S )Nc           
          g | ]S}|j         D ]I}|j        d k    t          |j        |                    d          |                    d                    JTS )imagesrctitle)childrenr   rt   r6   attrGet)r   inline_tokenchild_tokens      r   rP   z@CategorizedMarkdownCorpusReader.image_reader.<locals>.<listcomp>  s     
 
 
   ,4
 
 7** ###E**##G,,  +***r   c                     | j         dk    S Ninliner   r   s    r   r   z>CategorizedMarkdownCorpusReader.image_reader.<locals>.<lambda>	      !&H, r   r   r/   r   r   r   s     r   image_readerz,CategorizedMarkdownCorpusReader.image_reader  O    
 
 !',,dk.?.?.N.N! !
 
 
 	
r   c                 :    |                      | j        ||          S rF   )r   r   r   s      r   imagesz&CategorizedMarkdownCorpusReader.images  s    %%d&7*MMMr   c                     d t          d | j                            |                                                    D             S )Nc           
          g | ]q}t          |j                  D ]Z\  }}|j        d k    t          |j        |dz            j        |                    d          |                    d                    [rS )	link_openr   hrefr   )	enumerater   r   ru   r6   r   )r   r   ir   s       r   rP   z?CategorizedMarkdownCorpusReader.link_reader.<locals>.<listcomp>  s     
 
 
  #,L,A"B"B
 
 ;;.. %a!e,4##F++##G,,  /...r   c                     | j         dk    S r   r   r   s    r   r   z=CategorizedMarkdownCorpusReader.link_reader.<locals>.<lambda>  r   r   r   r   s     r   link_readerz+CategorizedMarkdownCorpusReader.link_reader  r   r   c                 :    |                      | j        ||          S rF   )r   r  r   s      r   linksz%CategorizedMarkdownCorpusReader.links!      %%d&6LLLr   c                   
 | j                             |                                          }dt          fd|          }d
t          
fd|          }t	                      }t          ||          D ]P\  }}|                    |          }|                    ||          }	|                    |||	dz                       Qd |D             S )N)bullet_list_openordered_list_openc                 ,    | j         dk    o| j        v S r,   r   )r   opening_typess    r   r   z=CategorizedMarkdownCorpusReader.list_reader.<locals>.<lambda>)      agl>qv'> r   )bullet_list_closeordered_list_closec                 ,    | j         dk    o| j        v S r,   r   )r   closing_typess    r   r   z=CategorizedMarkdownCorpusReader.list_reader.<locals>.<lambda>-  r
  r   r   c                 `    g | ]+}t          |d          j        dk    d |D                       ,S )r   r  c                 *    g | ]}|j         	|j         S r   rG   r   s     r   rP   zJCategorizedMarkdownCorpusReader.list_reader.<locals>.<listcomp>.<listcomp>7  s!    888qai8888r   )rv   r   )r   r   s     r   rP   z?CategorizedMarkdownCorpusReader.list_reader.<locals>.<listcomp>4  sT     
 
 

 	 q	"5588F888 
 
 
r   r   )r8   r.   r   r   r   list_blocksr   r   r   r   r  r	  s             @@r   list_readerz+CategorizedMarkdownCorpusReader.list_reader%  s    ""6;;==11A>>>>
 
 D>>>>
 
 ff77 	J 	JDAq"LLOOM"LLM::Mvmma6G&GHIIII
 

 &
 
 
 	
r   c                 :    |                      | j        ||          S rF   )r   r  r   s      r   listsz%CategorizedMarkdownCorpusReader.lists<  r  r   c                 b    t                      t                      }}d} j                            |                                          D ]V}|j        dk    r2|j        dk    r'|r|                    |           t                      }d}|r|                    |           W fd|D             S )NFr   heading_openTc                     g | ]f}t          |d          j        |d         j                            d          j        j                            |j        j        d                    gS )r   r   #Nr   )rp   r6   markupcountr/   r   r-   r   r   s     r   rP   zBCategorizedMarkdownCorpusReader.section_reader.<locals>.<listcomp>K  sx     
 
 
  a a%%c**$++E4;3FD+QQ 
 
 
r   )r   r/   r   r   rs   r   r   )r8   r.   section_blocksr0   
in_headingr   s   `     r   section_readerz.CategorizedMarkdownCorpusReader.section_reader@  s     $
""6;;==11 	  	 Aw!||. 8 8 1"))%000!
  Q
 
 
 
 (
 
 
 	
r   c                 :    |                      | j        ||          S rF   )r   r  r   s      r   sectionsz(CategorizedMarkdownCorpusReader.sectionsT  s    %%d&97JOOOr   rF   )NN)r=   rZ   r[   __doc__r9   r*   r   r   rH   rJ   rQ   rX   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  rm   rn   s   @r   r   r      s         )/ H H H H H. !+ + + + + ! + !+ + + + + ! + !? ? ? ? ? ! ? !A A A A A ! A !A A A A A ! A !A A A A A ! A

 
 

 
 
 !Q Q Q ! Q
 
 
( !S S S ! S
 
 
 !S S S ! S
 
 
 !N N N ! N
 
 
 !M M M ! M
 
 
. !M M M ! M
 
 
( !P P P ! P P P P Pr   r   N)collectionsr   	functoolsr   r   nltk.corpus.reader.apir   nltk.corpus.reader.plaintextr   nltk.corpus.reader.utilr   r	   nltk.tokenizer
   r   r   r*   r1   r3   r^   rp   rt   ru   rv   rx   r   r   r   r   <module>r'     s   " " " " " " $ $ $ $ $ $ $ $ : : : : : : > > > > > > @ @ @ @ @ @ @ @ J J J J J J J J J J  2  
 
 
 
 
 
 
 
D
 
 
 
 
 
 
 
*         m       	
7/00z&.//z&-..    0   0QP QP QP QP QP&=?S QP QP QP QP QPr   