Package nltk_lite :: Package misc :: Module wordfinder
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.misc.wordfinder

  1  # Natural Language Toolkit: Word Finder 
  2  # 
  3  # Copyright (C) 2001-2007 University of Pennsylvania 
  4  # Author: Steven Bird <sb@csse.unimelb.edu.au> 
  5  # URL: <http://nltk.sf.net> 
  6  # For license information, see LICENSE.TXT 
  7   
  8  # Simplified from PHP version by Robert Klein <brathna@gmail.com> 
  9  # http://fswordfinder.sourceforge.net/ 
 10   
 11  import random 
 12  from string import strip, join 
 13   
 14  # reverse a word with probability 0.5 
15 -def revword(word):
16 if random.randint(1,2) == 1: 17 word = list(word) 18 word.reverse() 19 return ''.join(word) 20 return word
21 22 # try to insert word at position x,y; direction encoded in xf,yf
23 -def step(word, x, xf, y, yf, grid):
24 for i in range(len(word)): 25 if grid[xf(i)][yf(i)] != "" and grid[xf(i)][yf(i)] != word[i]: 26 return False 27 for i in range(len(word)): 28 grid[xf(i)][yf(i)] = word[i] 29 return True
30 31 # try to insert word at position x,y, in direction dir
32 -def check(word, dir, x, y, grid, rows, cols):
33 if dir==1: 34 if x-len(word)<0 or y-len(word)<0: 35 return False 36 return step(word, x, lambda i:x-i, y, lambda i:y-i, grid) 37 elif dir==2: 38 if x-len(word)<0: 39 return False 40 return step(word, x, lambda i:x-i, y, lambda i:y, grid) 41 elif dir==3: 42 if x-len(word)<0 or y+(len(word)-1)>=cols: 43 return False 44 return step(word, x, lambda i:x-i, y, lambda i:y+i, grid) 45 elif dir==4: 46 if y-len(word)<0: 47 return False 48 return step(word, x, lambda i:x, y, lambda i:y-i, grid)
49
50 -def wordfinder(words, rows=20, cols=20, attempts=50, alph='ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
51 """ 52 Attempt to arrange words into a letter-grid with the specified number of 53 rows and columns. Try each word in several positions and directions, until 54 it can be fitted into the grid, or the maximum number of allowable attempts 55 is exceeded. Returns a tuple consisting of the grid and the words that were 56 successfully placed. 57 58 @param words: the list of words to be put into the grid 59 @type words: C(list) 60 @param rows: the number of rows in the grid 61 @type rows: C(int) 62 @param cols: the number of columns in the grid 63 @type cols: C(int) 64 @param attempts: the number of times to attempt placing a word 65 @type attempts: C(int) 66 @param alph: the alpabet, to be used for filling blank cells 67 @type alph: C(list) 68 @rtype: C(tuple) 69 """ 70 71 # place longer words first 72 words.sort(cmp=lambda x,y:cmp(len(x),len(y)), reverse=True) 73 74 grid = [] # the letter grid 75 used = [] # the words we used 76 77 # initialize the grid 78 for i in range(rows): 79 grid.append([""] * cols) 80 81 # try to place each word 82 for word in words: 83 word = strip(word).upper() # normalize 84 save = word # keep a record of the word 85 word = revword(word) 86 for attempt in range(attempts): 87 r = random.randint(0, len(word)) 88 dir = random.choice([1,2,3,4]) 89 x = random.randint(0,rows) 90 y = random.randint(0,cols) 91 if dir==1: x+=r; y+=r 92 elif dir==2: x+=r 93 elif dir==3: x+=r; y-=r 94 elif dir==4: y+=r 95 if 0<=x<rows and 0<=y<cols: 96 if check(word, dir, x, y, grid, rows, cols): 97 # used.append((save, dir, x, y, word)) 98 used.append(save) 99 break 100 101 # Fill up the remaining spaces 102 for i in range(rows): 103 for j in range(cols): 104 if grid[i][j] == '': 105 grid[i][j] = random.choice(alph) 106 107 return grid, used
108
109 -def demo():
110 from nltk_lite.corpora import words 111 wordlist = list(words.raw()) 112 random.shuffle(wordlist) 113 wordlist = wordlist[:200] 114 wordlist = [w for w in wordlist if 3 <= len(w) <= 12] 115 grid, used = wordfinder(wordlist) 116 117 print "Word Finder\n" 118 for i in range(len(grid)): 119 for j in range(len(grid[i])): 120 print grid[i][j], 121 print 122 print 123 124 for i in range(len(used)): 125 print "%d:" % (i+1), used[i]
126 127 if __name__ == '__main__': 128 demo() 129