%pip install matplotlib nltk
import glob
import matplotlib.pyplot as plt
from nltk.tokenize import TreebankWordTokenizer
from string import punctuation
words = ['war', 'love', 'death', 'life', 'marry', 'fight', 'king', 'queen']
def lexical_dispersion_plot_directory(directory):
for file_path in glob.glob(directory, recursive=True):
with open(file_path, "r") as file:
text = file.read()
# Custom Tokenizer
# Start of Go-To Tokenizer for English - Modified TreebankWordTokenize
tokens = TreebankWordTokenizer().tokenize(text)
for word in tokens:
word = word.lower().strip(punctuation)
tokens = list(filter(None, tokens))
## EOF Function
points = [(x, y) for x in range(len(tokens))
for y in range(len(words)) if tokens[x] == words[y]]
if points:
x, y = zip(*points)
else:
x = y = ()
print(f"Lexical Dispersion Plot for {file_path}")
plt.figure(figsize=(8, 6))
plt.plot(x, y, "rx")
plt.yticks(range(len(words)), words)
plt.ylim(-1, len(words))
plt.title(f"Lexical Dispersion Plot for {file_path}")
plt.xlabel("Word Offset")
plt.show()
def lexical_dispersion_plot_file(file_path):
with open(file_path, "r") as file:
text = file.read()
# Custom Tokenizer
# Start of Go-To Tokenizer for English - Modified TreebankWordTokenize
tokens = TreebankWordTokenizer().tokenize(text)
for word in tokens:
word = word.lower().strip(punctuation)
tokens = list(filter(None, tokens))
## EOF Function
points = [(x, y) for x in range(len(tokens))
for y in range(len(words)) if tokens[x] == words[y]]
if points:
x, y = zip(*points)
else:
x = y = ()
print(f"Lexical Dispersion Plot for {file_path}")
plt.figure(figsize=(8, 6))
plt.plot(x, y, "rx")
plt.yticks(range(len(words)), words)
plt.ylim(-1, len(words))
plt.title(f"Lexical Dispersion Plot for {file_path}")
plt.xlabel("Word Offset")
plt.show()
lexical_dispersion_plot_file("books/Shakespeare-corpus/Ado Much Ado About Nothing.txt")
I commented out the lines in order to keep the page more tidy as the corpora contain a lot of text files
# lexical_dispersion_plot_directory("books/Shakespeare-corpus/*.txt")
# lexical_dispersion_plot_directory("books/Marlowe-corpus/*.txt")
lexical_dispersion_plot_file("books/custom-corpus/1HVI-MIT (CL).txt")
lexical_dispersion_plot_directory("books/custom-corpus/*.txt")