
    zIfN                     t   d dl Z d dlmZ d dlZd dlmZmZmZmZm	Z	m
Z
mZmZ d dlmZ  ej        d          d             Z ej        d          d             Z ej        d          d	             Z ej        d          d
             Zej        d             Zej                            dddgdfddgd fdddg          d             Zd Zd Zd Zd Zej        d             Zej                            ddddgdfdddg          d             Zej        d             Zej                            dddgdfd d!d"g          d#             Zd$ Z ej        d%             Z!ej                            dddgdfddgd&fdd'dgdfdd'dgd&fg          d(             Z"ej        d)             Z#ej                            dddgd*fd+d,d-g          d.             Z$d/ Z%d0 Z&ej        d1             Z'ej                            dd2d3ddd'gd4fdd5d'gd6fdd7d'gd4fg          d8             Z(ej        d9             Z)ej                            dd:d3ddd'gd;fdd5d'gd<fdd7d'gd;fg          d=             Z*ej        d>             Z+ej                            dd2d3ddd'gd?fdd5d'gd@fdd7d'gd?fg          dA             Z,ej        dB             Z-ej                            dd2d3ddd'gdCfdd5d'gdDfdd7d'gdEfg          dF             Z. ej        d          dG             Z/ej                            dHdIdJdKdLdMdNdO ej0        dPej        1                    dQR          S          g          ej                            dTg dU ed           V          dW                         Z2dX Z3dY Z4dZ Z5d[ Z6d\ Z7d] Z8d^ Z9dS )_    N)
itemgetter)MLEAbsoluteDiscountingInterpolatedKneserNeyInterpolatedLaplaceLidstoneStupidBackoff
VocabularyWittenBellInterpolated)padded_everygramssession)scopec                  (    t          g dd          S )N)abcdz<s></s>   )
unk_cutoff)r
        Q/var/www/piapp/venv/lib/python3.11/site-packages/nltk/test/unit/lm/test_models.py
vocabularyr      s    >>>1MMMMr   c                      g dg dgS )N)r   r   r   r   )egr   r   r   r   r   r   r   r   training_datar       s       "@"@"@AAr   c                     d | D             S )Nc                 H    g | ]}t          t          d |                     S )   listr   .0sents     r   
<listcomp>z(bigram_training_data.<locals>.<listcomp>%   +    GGGD"1d++,,GGGr   r   r    s    r   bigram_training_datar,   #       GGGGGGr   c                     d | D             S )Nc                 H    g | ]}t          t          d |                     S )   r$   r&   s     r   r)   z)trigram_training_data.<locals>.<listcomp>*   r*   r   r   r+   s    r   trigram_training_datar1   (   r-   r   c                 R    t          d|           }|                    |           |S Nr#   r   r   fit)r   r,   models      r   mle_bigram_modelr8   -   s,    j)))E	II"###Lr   zword, context, expected_scorer   r   r   r   )r   Nr   )r   Ng$I$I?)yNg۶m۶m?c                 d    t          j        |                     ||          d          |k    sJ d S N-C6?pytestapproxscore)r8   wordcontextexpected_scores       r   test_mle_bigram_scoresrD   4   s8      =)//g>>EEWWWWWWr   c                 \    t          j        |                     ddg                    sJ d S )Nr   r   )mathisinflogscore)r8   s    r   'test_mle_bigram_logscore_for_zero_scorerI   G   s1    :&//cU;;<<<<<<<r   c                     g d}d}d}t          j        |                     |          d          |k    sJ t          j        |                     |          d          |k    sJ d S )N)r   r   )r   r   )r   <UNK>)rL   r   )r   r   r   r   g(\?g_vO@r<   r>   r?   entropy
perplexity)r8   trainedHrP   s       r   'test_mle_bigram_entropy_perplexity_seenrS   K   s}      G" 	AJ=)11'::DAAQFFFF=)44W==tDD
RRRRRRr   c                     g d}t          j        |                     |                    sJ t          j        |                     |                    sJ d S )N)rK   r   r   )r   r   rM   )rF   rG   rO   rP   )r8   	untraineds     r   )test_mle_bigram_entropy_perplexity_unseenrW   d   s]    EEEI:&..y99::::::&11)<<=======r   c                     d}d}g d}t          j        |                     |          d          |k    sJ t          j        |                     |          d          |k    sJ d S )Ng~jt@gs @)r   r   r   )-r   r[   )r   r<   rN   )r8   rR   rP   texts       r   +test_mle_bigram_entropy_perplexity_unigramsr_   l   sw     	AJHHHD=)11$77>>!CCCC=)44T::DAAZOOOOOOr   c                 R    t          d|          }|                    |            |S Nr0   orderr   r5   r1   r   r7   s      r   mle_trigram_modelre      s,    aJ///E	II#$$$Lr   )r   )r   r   r   )r   Ngqq?)r9   NUUUUUU?c                 d    t          j        |                     ||          d          |k    sJ d S r;   r=   )re   rA   rB   rC   s       r   test_mle_trigram_scoresrh      s8    $ =*00w??FF.XXXXXXr   c                 T    t          dd|          }|                    |            |S )N皙?r#   rb   r   r6   r,   r   r7   s      r   lidstone_bigram_modelrm      s.    Sj999E	II"###Lr   g88?)r   Ng"u)?)r   Ngк{?)r9   NgL?c                 d    t          j        |                     ||          d          |k    sJ d S r;   r=   )rm   rA   rB   rC   s       r   test_lidstone_bigram_scorero      sE    4 	+11$@@$GG	 	 	 	 	 	r   c                     g d}d}d}t          j        |                     |          d          |k    sJ t          j        |                     |          d          |k    sJ d S )NrK   rU   )r   rL   )rL   r   )r   r   )r   r   g=,Ԛ]@g鷯1@r<   rN   )rm   r^   rR   rP   s       r    test_lidstone_entropy_perplexityrr      s}      D$ 	AJ=.66t<<dCCqHHHH=.99$??FF*TTTTTTr   c                 T    t          dd|          }|                    |            |S )Nrj   r0   rb   rk   rd   s      r   lidstone_trigram_modelrt      s.    Sj999E	II#$$$Lr   gqq?r   c                 d    t          j        |                     ||          d          |k    sJ d S r;   r=   )rt   rA   rB   rC   s       r   test_lidstone_trigram_scorerv      sE     	,224AA4HH	 	 	 	 	 	r   c                 R    t          d|          }|                    |            |S r3   )r   r6   rl   s      r   laplace_bigram_modelrx      s,    A*---E	II"###Lr   gqq?)r   NgtE]t?)r   NgF]tE?)r9   NgF]tE?c                 d    t          j        |                     ||          d          |k    sJ d S r;   r=   )rx   rA   rB   rC   s       r   test_laplace_bigram_scorerz      s8    6 	*00w??FF.XXXXXXr   c                     g d}d}d}t          j        |                     |          d          |k    sJ t          j        |                     |          d          |k    sJ d S )Nrq   gQ	@gݓz!@r<   rN   )rx   r^   rR   rP   s       r   &test_laplace_bigram_entropy_perplexityr|     s}      D$ 	AJ=-55d;;TBBaGGGG=-88>>EESSSSSSr   c                      | j         dk    sJ d S )Nr   )gamma)rx   s    r   test_laplace_gammar   4  s    %******r   c                 R    t          d|          }|                    |            |S )Nr0   r4   )r   r6   rd   s      r   wittenbell_trigram_modelr   8  s,    "1<<<E	II#$$$Lr   )r   Ngqq?)r   Ng        grq?r   gqq?r   c                 d    t          j        |                     ||          d          |k    sJ d S r;   r=   )r   rA   rB   rC   s       r   test_wittenbell_trigram_scorer   ?  sF    D 	.44T7CCTJJ	 	 	 	 	 	r   c                 T    t          dd|          }|                    |            |S )Nr0   g      ?)rc   discountr   r   r6   rd   s      r   kneserney_trigram_modelr   q  s.    !DZPPPE	II#$$$Lr   )r   Ng$I$I?gm۶m?g$I$I?c                 d    t          j        |                     ||          d          |k    sJ d S r;   r=   )r   rA   rB   rC   s       r   test_kneserney_trigram_scorer   x  sF    P 	-33D'BBDII	 	 	 	 	 	r   c                 R    t          d|          }|                    |            |S ra   )r   r6   rd   s      r   "absolute_discounting_trigram_modelr     s,    +!
KKKE	II#$$$Lr   rf   g      ?c                 d    t          j        |                     ||          d          |k    sJ d S r;   r=   )r   rA   rB   rC   s       r   'test_absolute_discounting_trigram_scorer     sF    J 	8>>tWMMtTT	 	 	 	 	 	r   c                 R    t          d|          }|                    |            |S ra   )r	   r6   rd   s      r   stupid_backoff_trigram_modelr     s,    j999E	II#$$$Lr   g      ?      ?g?c                 d    t          j        |                     ||          d          |k    sJ d S r;   r=   )r   rA   rB   rC   s       r   !test_stupid_backoff_trigram_scorer     sE    2 	288wGGNN	 	 	 	 	 	r   c                 R    t          d|          }|                    |            |S )Nr#   rb   r   rl   s      r   kneserney_bigram_modelr      s,    !jAAAE	II"###Lr   model_fixturer8   re   rm   rx   r   r   r   r   z*Stupid Backoff is not a valid distribution)reason)marksrB   )	rZ   r[   rY   )r   rL   r]   )r   )r)w)idsc                     |                     |           t          fdj        D                       }t          j        |d          dk    sJ d S )Nc              3   D   K   | ]}                     |          V  d S N)r@   )r'   r   rB   r7   s     r   	<genexpr>z!test_sums_to_1.<locals>.<genexpr>   s1      JJU[[G44JJJJJJr   gHz>r   )getfixturevaluesumvocabr>   r?   )r   rB   requestscores_for_contextr7   s    `  @r   test_sums_to_1r     se    0 ##M22EJJJJJekJJJJJ=+T22c999999r   c                 >    |                      d          dk    sJ d S )Nr0   random_seedrL   generatere   s    r   test_generate_one_no_contextr   )  s*    %%!%44??????r   c                     |                      dg          dk    sJ |                      ddg          dk    sJ |                      ddg          dk    sJ d S )Nr   	text_seedr   r   r   r   r   s    r   'test_generate_one_from_limiting_contextr   -  sx    %%%66#====%%c
%;;sBBBB%%c
%;;sBBBBBBr   c                 @    |                      dd          dk    sJ d S )N)r   r   r#   r   r   r   r   r   s    r   %test_generate_one_from_varied_contextr   4  s-    %%!%LLPSSSSSSSr   c                     t          | j        t          d                    g}|                     |           |                     ddd          g dk    sJ d S )Nbdbdbd   )r   r      r   )r   r   r   r   r   r   r   )r   rc   r%   r6   r   )re   more_training_texts     r   test_generate_cycler   9  s|    +,=,CT(^^TTU,---%%a:1%MM R R R      r   c                 F    |                      ddd          g dk    sJ d S )Nr   )r   r   r0   r   )rL   r   r   r   rL   r   r   s    r   test_generate_with_text_seedr   J  sJ    %%a<Q%OO T T T      r   c                 j    |                      dd          |                      dd          k    sJ d S )N)aliensr0   r   r   r   r   s    r   test_generate_oov_text_seedr   T  sY    %%1 &  		#	#ja	#	H	HI I I I I Ir   c                     t          j        t                    5  |                     d           d d d            n# 1 swxY w Y   |                     d d          |                     d          k    sJ d S )Nr   r   r0   r   r   )r>   raises	TypeErrorr   r   s    r   test_generate_None_text_seedr   Z  s    	y	!	! 6 6""W"5556 6 6 6 6 6 6 6 6 6 6 6 6 6 6 %%A &  		#	#	#	2	23 3 3 3 3 3s   =AA):rF   operatorr   r>   nltk.lmr   r   r   r   r   r	   r
   r   nltk.lm.preprocessingr   fixturer   r    r,   r1   r8   markparametrizerD   rI   rS   rW   r_   re   rh   rm   ro   rr   rt   rv   rx   rz   r|   r   r   r   r   r   r   r   r   r   r   paramxfailr   r   r   r   r   r   r   r   r   r   r   <module>r      s          	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 4 3 3 3 3 3 i   N N ! N i   B B ! B i   H H ! H i   H H ! H    #	seQ	seQ 	 X X X= = =S S S2> > >P P P(    # 	 
seQ "Y Y# "Y    # 
seY 	  	  	 ' 0 1 0U U U2    # 
seY	seY	sCj)$	sCj)$       # 
seW 	 	 	) 2 3 2T T T2+ + +    # 	 	 	 
se34
 
sCjLM 
sCj9;3 < = <(    # 	 	 	 
se-/ 
sCjAB 
sCj24? # #H I# #H    # 	 	 	 
se79 
sCjKL 
sCj<>9   B C   B    # 	 	 		seU	sCj%  
sCj=*! * + * i     !  ", *+##C $  	
 	
 	
 $ RRR
1   
: : % .:@ @ @C C CT T T
  "  I I I3 3 3 3 3r   