!pip install wordcloud !pip install --user requests import requests # to download files through url res = requests.get('https://automatetheboringstuff.com/files/rj.txt') type(res) # checking for error, res.status_code == requests.codes.ok len(res.text) #178,000+ characters long print(res.text[:0]) print(res.text[:1]) print(res.text[0]) print(res.text[:2]) print(res.text[:3]) print(res.text[:100]) print(res.text[:500]) res.raise_for_status() # checking for error # response.raise_for_status() returns an HTTPError object if an error has occurred during the process. playFile = open('RomeoAndJuliet.txt', 'wb') # to write binary data instead of text data # to maintain the Unicode encoding of the text. refer to the book # https://www.geeksforgeeks.org/response-iter_content-python-requests/ for chunk in res.iter_content(200000): playFile.write(chunk) playFile.close() with open("RomeoAndJuliet.txt", 'r') as fh: filedata = fh.read() print(type(filedata)) print(len(filedata)) print('---------------') print(filedata[:500]) #Library to form wordcloud : from wordcloud import WordCloud, STOPWORDS stopwords = set(STOPWORDS) #Library to plot the wordcloud : import matplotlib.pyplot as plt #Generating the wordcloud data : wordcloud = WordCloud(stopwords=stopwords, max_words=100).generate(filedata) #Plot the wordcloud : plt.figure(figsize = (10, 10)) plt.imshow(wordcloud) #To remove the axis value : plt.axis("off") plt.show() #Add more words to ignore stopwords.update(["many","go", "want", "value", "will", "come", "give", "Nurse", "one", "now", "go", "yet", "let"]) #Redo stop words. Limit number of words wordcloud = WordCloud(stopwords=stopwords, max_words=100, \ background_color="white").generate(filedata) #Plot the wordcloud : plt.figure(figsize = (10, 10)) plt.imshow(wordcloud) #To remove the axis value : plt.axis("off") plt.show() !pip install pillow pwd #Import required libraries : import numpy as np from PIL import Image #Here we are going to use a circle image as mask : #char_mask = np.array(Image.open("")) # makes the circle using numpy x, y = np.ogrid[:300, :300] mask = (x - 150) ** 2 + (y - 150) ** 2 > 130 ** 2 mask = 255 * mask.astype(int) #Generating wordcloud : wordcloud = WordCloud(background_color="black",contour_width=0.5, contour_color="yellow", mask=mask).generate(filedata) #Plot the wordcloud : plt.figure(figsize = (8,8)) plt.imshow(wordcloud) #To remove the axis value : plt.axis("off") plt.show()