
    [f                         d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlmZ  G d d          Z	 G d d	          Z
d
 Zedk    r e             dS dS )    N)Counter)brown)words)FreqDistc                   @    e Zd ZddZd Zd Zd Zd Zd Zd	 Z	d
 Z
dS )Words      c                     || _         || _        |                                  t                      | _        |                                  d S )N)	min_limit	max_limit_initialize_dataTrieNode	trie_root_populate_trie)selfr   r   s      /var/www/piapp/lib/words.py__init__zWords.__init__   sI    "" 	 "    c                    t           j        j                            d           t           j                            d           t           j                            d           t          t          j                              | _        dS )zJCheck and download necessary NLTK corpora, and load frequency distributionz/var/www/piapp/nltk_datazcorpora/wordszcorpora/brownN)	nltkdatapathappendfindr   r   r   	freq_distr   s    r   r   zWords._initialize_data   s^     		8999	'''	'''!%+--00r   c                     t          t          j                              D ])}|                     |                                           *dS )z*Populate trie with English words from NLTKN)setr   _insert_to_trielower)r   words     r   r   zWords._populate_trie!   sH    && 	/ 	/D  ....	/ 	/r   c                     | j         }|D ].}||j        vrt                      |j        |<   |j        |         }/d|_        dS )zInsert a word into the trieTN)r   childrenr   is_end_of_wordr   r"   currentletters       r   r    zWords._insert_to_trie&   sT    . 	/ 	/FW---+3:: (&v.GG!%r   c                 Z    |                      |          }|                     |          }|S )z@Find and sort valid words from permutations of the source string)_find_valid_wordssort_words_by_frequency)r   source_stringvalid_wordssorted_word_freq_pairss       r   find_permutationszWords.find_permutations/   s/    ,,];;!%!=!=k!J!J%%r   c           	         t                      }t          |                                          }t          | j        t          | j        t          |                    dz             D ]}}t          j	        |
                                |          D ]S}d                    |                                          }|                     |          r|                    |           T~|S )zDGenerate valid words from permutations of input string within limits    )r   r   r!   ranger   minr   len	itertoolspermutationselementsjoin_find_in_trieadd)r   input_stringfound_wordschar_counterlengthpermr"   s          r   r*   zWords._find_valid_words5   s    ee|113344DNCL@Q@Q,R,RUV,VWW 	* 	*F!.|/D/D/F/FOO * *wwt}}**,,%%d++ *OOD)))* r   c                 Z    | j         }|D ]}||j        vr dS |j        |         }|j        S )zCheck if a word is in the trieF)r   r$   r%   r&   s       r   r:   zWords._find_in_trie@   sE    . 	/ 	/FW---uu&v.GG%%r   c                 F      fd|D             }t          |d d          S )z+Sort words by frequency in descending orderc                 .    g | ]}|j         |         fS  )r   ).0r"   r   s     r   
<listcomp>z1Words.sort_words_by_frequency.<locals>.<listcomp>K   s%    III$$t,-IIIr   c                     | d         S )Nr1   rD   )pairs    r   <lambda>z/Words.sort_words_by_frequency.<locals>.<lambda>L   s
    d1g r   T)keyreverse)sorted)r   unsorted_wordspairss   `  r   r+   zWords.sort_words_by_frequencyI   s5    IIII.IIIe!5!5tDDDDr   N)r	   r
   )__name__
__module____qualname__r   r   r   r    r/   r*   r:   r+   rD   r   r   r   r   
   s        
 
 
 
1 1 1/ / /
& & && & &	 	 	& & &E E E E Er   r   c                       e Zd ZdZd ZdS )r   zNode for Trie data structurec                 "    i | _         d| _        d S )NF)r$   r%   r   s    r   r   zTrieNode.__init__R   s    #r   N)rO   rP   rQ   __doc__r   rD   r   r   r   r   O   s)        &&$ $ $ $ $r   r   c                  p    t          dd          } d}t          |                     |                     d S )Nr	      )r   r   example)r   printr/   )words_instancer,   s     r   mainrZ   W   s;    Q!444NM	.
*
*=
9
9:::::r   __main__)r6   collectionsr   r   nltk.corpusr   r   nltk.probabilityr   r   r   rZ   rO   rD   r   r   <module>r_      s                           % % % % % %BE BE BE BE BE BE BE BEJ$ $ $ $ $ $ $ $; ; ; zDFFFFF r   