#!pip install --quiet spacy
#!conda install -c conda-forge spacy
!spacy download en_core_web_md
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_md")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
text = """In ancient Rome, some neighbors live in three adjacent houses. In the center is the house of Senex, who lives there with wife Domina, son Hero, and several slaves, including head slave Hysterium and the musical's main character Pseudolus. A slave belonging to Hero, Pseudolus wishes to buy, win, or steal his freedom. One of the neighboring houses is owned by Marcus Lycus, who is a buyer and seller of beautiful women; the other belongs to the ancient Erronius, who is abroad searching for his long-lost children (stolen in infancy by pirates). One day, Senex and Domina go on a trip and leave Pseudolus in charge of Hero. Hero confides in Pseudolus that he is in love with the lovely Philia, one of the courtesans in the House of Lycus (albeit still a virgin)."""
longer_doc = nlp(text)
sentence_spans = list(longer_doc.sents)
print(sentence_spans)
for token in doc:
print(token.text)
print(f'{"text":15s}{"lemma":15s}{"pos":15s}{"tag_":15s}{"dep_":15s}{"shape_":15s}is_alpha, is_stop')
print("-"*110)
for token in doc:
print(f'{token.text:15s}{token.lemma_:15s}{token.pos_:15s}{token.tag_:15s}{token.dep_:15s}{token.shape_:15s}{token.is_alpha}, {token.is_stop}')
for token in doc:
print(token.morph)
displacy.render(doc, style="dep")
displacy.render(sentence_spans, style="dep")
for ent in doc.ents:
print(f'{ent.text:15s}{ent.label_:10}{ent.start_char:5d}{ent.end_char:5d}')
displacy.render(doc, style="ent")
displacy.render(longer_doc, style="ent")
for token in doc:
print(f'{token.text:15s}{token.vector_norm:10f} {token.has_vector} {token.is_oov}')
doc1 = nlp("I like salty fries and hamburgers.")
doc2 = nlp("Fast food tastes very good.")
# Similitud de dos documentos
print(doc1, "<->", doc2, ":", doc1.similarity(doc2))
# Similitud de tokens y spans
french_fries = doc1[2:4]
burgers = doc1[5]
print(french_fries, "<->", burgers, ":", french_fries.similarity(burgers))