#!/usr/bin/env python # coding: utf-8 #
Peter Norvig
January 2020
# # # Boggle # # ![Boggle Board](https://www.puzzle-words.com/art/og/puzzle-words.png) # # The word game **Boggle** ([rules here](https://howdoyouplayit.com/boggle-rules-play-boggle/)) is a fun game to play with friends, but not so fun to play against a computer. It is all too simple for a computer to find *all* the valid words in a Boggle board (such as the one shown above) in less than a second. We'll see how to do that as a straightforward programming exercise. Then we'll consider a more challenging problem: # # # Inverse Boggle # # The goal of Inverse Boggle is to find an arrangement of letters on a board that scores the most points. It is called Inverse Boggle because, instead of "give me a board and I'll find the words" it is "give me the word list and I'll find a board with lots of words." # # It is not feasible to try all possible boards ($26^{5 \times 5} \approx 10^{35}$ possible $5 \times 5$ boards), so we will use **hill-climbing**, a heuristic search technique that does not guarantee finding an optimal solution. # # # The Boggle Board # # I'll represent a board as a dict where the keys are `(x, y)` coordinates of squares, and the values are individual letters (except that `QU` occupies a single square). A board will also hold: # - `board.squares`: a list of squares in row-major order. # - `board.neighbors`: a dict of `{square: neighbors}`. # - `board.format`: a string format method to print the board nicely using `__repr__`. # # # In[1]: import random import itertools class Board(dict): """A board is a dict of {(x, y): L}.""" def __init__(self, letters=None): letters = [('QU' if L == 'Q' else L) for L in letters if 'A' <= L <= 'Z'] n = int(len(letters) ** 0.5) self.squares = [(x, y) for y in range(n) for x in range(n)] self.neighbors = {s: neighbors(s, n) for s in self.squares} self.format = ('\n'.join(['{:2s}' * n] * n)).format self.update(zip(self.squares, letters)) def __repr__(self): return self.format(*(self[s].capitalize() for s in self.squares)) def neighbors(square, n) -> list: """All the squares that neighbor a square.""" (x, y) = square return [(x+dx, y+dy) for dx in (-1, 0, 1) for dy in (-1, 0, 1) if 0 <= x+dx < n and 0 <= y+dy < n and not (dx == dy == 0)] # In[2]: board = Board('PUZZL WORDE BOGGL SEARC FINDH') board # # The word list and scoring # # We'll read in a word list, convert to uppercase, and keep all the words that are three letters or more. The variable `WORDS` holds the set of valid words. The function `total_score` computes the total score of a set of words. It does not take a board as an argument, and so it does not check if the words can actually be made on the board. It does check that the words are in the `WORDS` list of valid words. # In[3]: get_ipython().system(' [ -e enable1.txt ] || curl -O http://norvig.com/ngrams/enable1.txt') get_ipython().system(' wc -w enable1.txt') # In[4]: def total_score(found, scores=[0, 0, 0, 1, 1, 2, 3, 5] + [11] * 99) -> int: """The total score for the words found, according to the rules.""" return sum([scores[len(w)] for w in found & WORDS]) WORDS = {w for w in open('enable1.txt').read().upper().split() if len(w) >= 3} len(WORDS) # # Finding all the words in a board # # The strategy for finding words: # - A word could start at any of the squares on the board, so consider each one. # - At each square, form a **path** consisting of just that square (for example, the top left square forms the path `[(0, 0)]`) and a **prefix string** consisting of the letters visited in the path; for this one-square path on `board` (the one with `PUZZLE` in it), the prefix string would be `'P'`. On a 5 by 5 board we would have 25 initial paths of length one. # - Now continue each path: # - If the prefix string is a word, record it: add it to the set `found`. # - If the prefix string is a prefix of some word in the dictionary, continue the path by visiting each neighboring square that has not already been visited in the path. For example, in `board`, the path `[(0, 0)]` with prefix `'P'` would be continued to three neighbors: # - `[(0, 0), (1, 0)]`, `'PU'`: here `'PU'` is a prefix of a word so continue this path. # - `[(0, 0), (0, 1)]`, `'PW'`: here `'PW'` is not a prefix of any word so do **not** continue this path. # - `[(0, 0), (1, 1)]`, `'PO'`: here `'PO'` is a prefix of a word so continue this path. # - One continuation of `'PO'` is as follows: # - `[(0, 0), (1, 1), (0, 1)]`, `'POW'`: here `'POW'` is both a word and a prefix, so record it and continue. # - We can precompute the set of all `PREFIXES` of all words in the word list. # In[5]: def word_prefixes(words) -> set: "The set of all non-empty, non-full-word prefixes of each word in a word list." return {word[:i] for word in words for i in range(1, len(word))} PREFIXES = word_prefixes(WORDS) # Precompute this once. def find_words(board, words=WORDS, prefixes=PREFIXES): """Find all words in a Boggle board by recursively continuing paths that are prefixes of words.""" found = set() # Accumulate the words found in the set `found` def continue_path(path, prefix): if prefix in words: found.add(prefix) if prefix in prefixes: for s in board.neighbors[path[-1]]: if s not in path: continue_path(path + [s], prefix + board[s]) # Body of find_words: Start paths from each of the squares on the board for s in board.squares: continue_path([s], board[s]) return found # How many words can we find? And how long does it take? # In[6]: get_ipython().run_line_magic('time', 'len(find_words(board))') # Can we see the words and summarize them? # In[7]: import textwrap def report(board, verbose=True): found = find_words(board, WORDS, PREFIXES) if verbose: print(f'{len(found)} words found:') print(textwrap.fill(' '.join(sorted(found)), width=90)) print(f'Score of {total_score(found)} for {len(found)} words on board:\n{board}') report(board) # Let's test that the `Q` works, and that a 4x4 board works: # In[8]: report(Board('QEST ANSI XWEO YRSN')) # # Inverse Boggle # # I'll tackle the Inverse Boggle problem with a **hill-climbing** approach: # - Start with some board. # - Make a random change to some letter on the board. # - Evaluate the score of the new board; if it is the best score so far, keep it. # - If not, revert the change. # - Repeat for a set number of iterations # In[9]: def boggle_hill_climbing(board, words=WORDS, prefixes=PREFIXES, repeat=500): """Solve Inverse Boggle by hill-climbing: find a high-scoring board by starting with one and changing it.""" board = Board(board[s] for s in board.squares) # Copy board, so we don't mutate original best_score = total_score(find_words(board, words, prefixes)) for _ in range(repeat): s, old_letter = mutate_boggle(board) new_score = total_score(find_words(board, words, prefixes)) if new_score >= best_score: best_score = new_score else: board[s] = old_letter # Change back return board # The distribution of letters in the 16-cube version of the game LETTERS = 3 * 'AABCDEEEGHIILMNOOPRSTUY' + 'AADEFFIJKKLLNNQRSSTTUVVWWXZ' def mutate_boggle(board, letters=LETTERS): """Make a random change to one letter in the board""" s = random.choice(board.squares) old_letter = board[s] board[s] = random.choice(letters) return s, old_letter def random_board(n=5, letters=LETTERS) -> Board: """Return a random Boggle board.""" return Board(random.sample(letters, n * n)) # Let's generate a random board and see if we can improve it: # In[10]: board2 = random_board(5) report(board2) # In[11]: report(boggle_hill_climbing(board2), False) # Impressive! We got roughly a ten-fold improvement in score after 500 repetitions. # We can do the same with our original `board`: # In[12]: report(board) # In[13]: report(boggle_hill_climbing(board), False) # Again, roughly a ten-fold improvement. # # Now, let's start from a very high-scoring board, identified by Justin Boyan in [his Ph.D. thesis](https://www.ri.cmu.edu/publications/learning-evaluation-functions-for-global-optimization/), and see if we can improve it: # In[14]: boyan = Board('RSTCS DEIAE GNLRP EATES MSSID') report(boyan, False) # In[15]: report(boggle_hill_climbing(boyan), False) # Sadly, we were not able to make an improvement in 500 repetitions. But that certainly is no guarantee that `boyan` is the best possible board. Here are some things to try to find a better board; maybe you can implement some of them, or try some ideas of your own: # # - **Genetic algorithms**: We used **mutation** of a single board, but we could also consider **crossover** where we keep a pool of boards and take the first half of one board and combine it with the second half of another. # - **Swaps**: We changed one letter at a time. But maybe there is no change of one letter that will improve a board, but there is a change involving two squares, either swapping them or mutating both of them. # - **Incremental score calculation**: We modified just one square and then tried to find all the words from scratch. Would it be faster to keep track of which squares contributed to which words, and only re-do the calculations for the one changed square? This would probably involve infixes of words rather than prefixes. Perhaps by keeping track of what each square contributes, we can make a better choice of which square to mutate. # - **Random restarts**: When is it best to continue searching from the current board, versus starting over from a new board?