All the IPython Notebooks in Python Natural Language Processing lecture series by Dr. Milaan Parmar are available @ GitHub
import spacy
nlp = spacy.load('en_core_web_sm')
doc2 = nlp(u"I'm always here to help you all! Email:milaanparmar9@gmail.com or visit more at https://github.com/milaan9!")
for t in doc2:
print(t)
I 'm always here to help you all ! Email:milaanparmar9@gmail.com or visit more at https://github.com/milaan9 !
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')
for t in doc3:
print(t)
A 5 km NYC cab ride costs $ 10.30
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")
for t in doc4:
print(t)
Let 's visit St. Louis in the U.S. next year .
# Import the toolkit and the full Porter Stemmer library
import nltk
from nltk.stem.porter import *
p_stemmer = PorterStemmer()
words = ['run','runner','running','ran','runs','easily','fairly']
for word in words:
print(word+' --> '+p_stemmer.stem(word))
run --> run runner --> runner running --> run ran --> ran runs --> run easily --> easili fairly --> fairli
#SnowballStemmer
from nltk.stem.snowball import SnowballStemmer
# The Snowball Stemmer requires that you pass a language parameter
s_stemmer = SnowballStemmer(language='english')
words = ['run','runner','running','ran','runs','easily','fairly']
# words = ['generous','generation','generously','generate']
for word in words:
print(word+' --> '+s_stemmer.stem(word))
run --> run runner --> runner running --> run ran --> ran runs --> run easily --> easili fairly --> fair
# ----Do Some more practice -----
words = ['consolingly']
print('Porter Stemmer:')
for word in words:
print(word+' --> '+p_stemmer.stem(word))
Porter Stemmer: consolingly --> consolingli
print('Porter2 Stemmer:')
for word in words:
print(word+' --> '+s_stemmer.stem(word))
Porter2 Stemmer: consolingly --> consol
# Stemming has its drawbacks. If given the token saw, stemming might always return saw, whereas lemmatization would likely return either
# see or saw depending on whether the use of the token was as a verb or a noun. As an example, consider the following:
phrase = 'I am meeting him tomorrow at the meeting'
for word in phrase.split():
print(word+' --> '+p_stemmer.stem(word))
I --> i am --> am meeting --> meet him --> him tomorrow --> tomorrow at --> at the --> the meeting --> meet
# Perform standard imports:
import spacy
nlp = spacy.load('en_core_web_sm')
var1 = nlp(u"John Adam is one the researcher who invent the direction of way towards success!")
for token in var1:
print(token.text, '\t', token.pos_, '\t', token.lemma, '\t', token.lemma_)
John PROPN 11174346320140919546 John Adam PROPN 14264057329400597350 Adam is AUX 10382539506755952630 be one NUM 17454115351911680600 one the DET 7425985699627899538 the researcher NOUN 1317581537614213870 researcher who PRON 3876862883474502309 who invent VERB 5373681334090504585 invent the DET 7425985699627899538 the direction NOUN 895834437038626927 direction of ADP 886050111519832510 of way NOUN 6878210874361030284 way towards ADP 9315050841437086371 towards success NOUN 16089821935113899987 success ! PUNCT 17494803046312582752 !
def show_lemmas(text):
for token in text:
print(f'{token.text:{12}} {token.pos_:{6}} {token.lemma:<{22}} {token.lemma_}')
var2 = nlp(u"John Adam is one the researcher who invent the direction of way towards success!")
show_lemmas(var2)
John PROPN 11174346320140919546 John Adam PROPN 14264057329400597350 Adam is AUX 10382539506755952630 be one NUM 17454115351911680600 one the DET 7425985699627899538 the researcher NOUN 1317581537614213870 researcher who PRON 3876862883474502309 who invent VERB 5373681334090504585 invent the DET 7425985699627899538 the direction NOUN 895834437038626927 direction of ADP 886050111519832510 of way NOUN 6878210874361030284 way towards ADP 9315050841437086371 towards success NOUN 16089821935113899987 success ! PUNCT 17494803046312582752 !
var3 = nlp(u"I am meeting him tomorrow at the meeting.")
show_lemmas(var3)
I PRON 4690420944186131903 I am AUX 10382539506755952630 be meeting VERB 6880656908171229526 meet him PRON 1655312771067108281 he tomorrow NOUN 3573583789758258062 tomorrow at ADP 11667289587015813222 at the DET 7425985699627899538 the meeting NOUN 14798207169164081740 meeting . PUNCT 12646065887601541794 .
var4 = nlp(u"That's of the greate person in the world")
show_lemmas(var4)
That PRON 4380130941430378203 that 's AUX 10382539506755952630 be of ADP 886050111519832510 of the DET 7425985699627899538 the greate ADJ 4429768169814447593 greate person NOUN 14800503047316267216 person in ADP 3002984154512732771 in the DET 7425985699627899538 the world NOUN 1703489418272052182 world