import spacy
INFO:tensorflow:Enabling eager execution INFO:tensorflow:Enabling v2 tensorshape INFO:tensorflow:Enabling resource variables INFO:tensorflow:Enabling tensor equality INFO:tensorflow:Enabling control flow v2
!python -m spacy download en_core_web_md
Collecting en-core-web-md==3.1.0 Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.1.0/en_core_web_md-3.1.0-py3-none-any.whl (45.4 MB) Requirement already satisfied: spacy<3.2.0,>=3.1.0 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from en-core-web-md==3.1.0) (3.1.1) Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (1.0.5) Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.7 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (3.0.8) Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (2.0.5) Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (1.7.4) Requirement already satisfied: packaging>=20.0 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (20.9) Requirement already satisfied: numpy>=1.15.0 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (1.20.2) Requirement already satisfied: catalogue<2.1.0,>=2.0.4 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (2.0.4) Requirement already satisfied: pathy>=0.3.5 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (0.4.0) Requirement already satisfied: blis<0.8.0,>=0.4.0 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (0.7.4) Requirement already satisfied: setuptools in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (54.1.1) Requirement already satisfied: wasabi<1.1.0,>=0.8.1 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (0.8.2) Requirement already satisfied: thinc<8.1.0,>=8.0.8 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (8.0.8)
2021-09-14 14:04:48.662634: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll WARNING: You are using pip version 21.1.2; however, version 21.2.4 is available. You should consider upgrading via the 'C:\Users\wma22\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.
Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (4.59.0) Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (2.25.1) Requirement already satisfied: srsly<3.0.0,>=2.4.1 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (2.4.1) Requirement already satisfied: jinja2 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (2.11.3) Requirement already satisfied: typer<0.4.0,>=0.3.0 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (0.3.2) Requirement already satisfied: preshed<3.1.0,>=3.0.2 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (3.0.5) Requirement already satisfied: pyparsing>=2.0.2 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from packaging>=20.0->spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (2.4.7) Requirement already satisfied: smart-open<4.0.0,>=2.2.0 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from pathy>=0.3.5->spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (3.0.0) Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from requests<3.0.0,>=2.13.0->spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (1.26.3) Requirement already satisfied: chardet<5,>=3.0.2 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from requests<3.0.0,>=2.13.0->spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (4.0.0) Requirement already satisfied: idna<3,>=2.5 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from requests<3.0.0,>=2.13.0->spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (2.10) Requirement already satisfied: certifi>=2017.4.17 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from requests<3.0.0,>=2.13.0->spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (2020.12.5) Requirement already satisfied: click<7.2.0,>=7.1.1 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from typer<0.4.0,>=0.3.0->spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (7.1.2) Requirement already satisfied: MarkupSafe>=0.23 in c:\users\wma22\appdata\local\programs\python\python39\lib\site-packages (from jinja2->spacy<3.2.0,>=3.1.0->en-core-web-md==3.1.0) (1.1.1) [+] Download and installation successful You can now load the package via spacy.load('en_core_web_md')
nlp = spacy.load("en_core_web_md")
with open ("data/wiki_us.txt", "r") as f:
text = f.read()
doc = nlp(text)
sentence1 = list(doc.sents)[0]
print (sentence1)
The United States of America (U.S.A. or USA), commonly known as the United States (U.S. or US) or America, is a country primarily located in North America.
import numpy as np
your_word = "country"
ms = nlp.vocab.vectors.most_similar(
np.asarray([nlp.vocab.vectors[nlp.vocab.strings[your_word]]]), n=10)
words = [nlp.vocab.strings[w] for w in ms[0][0]]
distances = ms[2]
print(words)
['country', 'COUNTRY', 'NATION', 'nation', 'COUNTIRES', 'nations', 'member-states', 'worLd', 'World', 'world']
doc1 = nlp("I like salty fries and hamburgers.")
doc2 = nlp("Fast food tastes very good.")
print (doc1, "<->", doc2, doc1.similarity(doc2))
I like salty fries and hamburgers. <-> Fast food tastes very good. 0.7799485853415737
doc3 = nlp("The Empire State Building is in New York.")
print (doc1, "<->", doc3, doc1.similarity(doc3))
I like salty fries and hamburgers. <-> The Empire State Building is in New York. 0.5196037639243649
doc4 = nlp("I enjoy oranges.")
doc5 = nlp("I enjoy apples.")
print (doc4, "<->", doc5, doc4.similarity(doc5))
I enjoy oranges. <-> I enjoy apples. 0.9607558420297302
doc6 = nlp("I enjoy burgers.")
print (doc4, "<->", doc6, doc4.similarity(doc6))
I enjoy oranges. <-> I enjoy burgers. 0.8755329986251214
french_fries = doc1[2:4]
burgers = doc1[5]
print(french_fries, "<->", burgers, french_fries.similarity(burgers))
salty fries <-> hamburgers 0.7304624
nlp = spacy.blank("en")
nlp.add_pipe("sentencizer")
<spacy.pipeline.sentencizer.Sentencizer at 0x27049c89e80>
nlp.analyze_pipes()
{'summary': {'sentencizer': {'assigns': ['token.is_sent_start', 'doc.sents'], 'requires': [], 'scores': ['sents_f', 'sents_p', 'sents_r'], 'retokenizes': False}}, 'problems': {'sentencizer': []}, 'attrs': {'doc.sents': {'assigns': ['sentencizer'], 'requires': []}, 'token.is_sent_start': {'assigns': ['sentencizer'], 'requires': []}}}
nlp2 = spacy.load("en_core_web_sm")
nlp2.analyze_pipes()
{'summary': {'tok2vec': {'assigns': ['doc.tensor'], 'requires': [], 'scores': [], 'retokenizes': False}, 'tagger': {'assigns': ['token.tag'], 'requires': [], 'scores': ['tag_acc'], 'retokenizes': False}, 'parser': {'assigns': ['token.dep', 'token.head', 'token.is_sent_start', 'doc.sents'], 'requires': [], 'scores': ['dep_uas', 'dep_las', 'dep_las_per_type', 'sents_p', 'sents_r', 'sents_f'], 'retokenizes': False}, 'attribute_ruler': {'assigns': [], 'requires': [], 'scores': [], 'retokenizes': False}, 'lemmatizer': {'assigns': ['token.lemma'], 'requires': [], 'scores': ['lemma_acc'], 'retokenizes': False}, 'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'], 'requires': [], 'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'], 'retokenizes': False}}, 'problems': {'tok2vec': [], 'tagger': [], 'parser': [], 'attribute_ruler': [], 'lemmatizer': [], 'ner': []}, 'attrs': {'token.ent_type': {'assigns': ['ner'], 'requires': []}, 'doc.ents': {'assigns': ['ner'], 'requires': []}, 'token.dep': {'assigns': ['parser'], 'requires': []}, 'token.head': {'assigns': ['parser'], 'requires': []}, 'token.tag': {'assigns': ['tagger'], 'requires': []}, 'token.lemma': {'assigns': ['lemmatizer'], 'requires': []}, 'token.ent_iob': {'assigns': ['ner'], 'requires': []}, 'doc.tensor': {'assigns': ['tok2vec'], 'requires': []}, 'doc.sents': {'assigns': ['parser'], 'requires': []}, 'token.is_sent_start': {'assigns': ['parser'], 'requires': []}}}