import spacy
INFO:tensorflow:Enabling eager execution INFO:tensorflow:Enabling v2 tensorshape INFO:tensorflow:Enabling resource variables INFO:tensorflow:Enabling tensor equality INFO:tensorflow:Enabling control flow v2
nlp = spacy.load("en_core_web_sm")
text = "West Chestertenfieldville was referenced in Mr. Deeds."
doc = nlp(text)
for ent in doc.ents:
print (ent.text, ent.label_)
West Chestertenfieldville PERSON Deeds PERSON
ruler = nlp.add_pipe("entity_ruler")
patterns = [
{"label": "GPE", "pattern": "West Chestertenfieldville"}
]
ruler.add_patterns(patterns)
doc = nlp(text)
for ent in doc.ents:
print (ent.text, ent.label_)
West Chestertenfieldville PERSON Deeds PERSON
nlp.analyze_pipes()
{'summary': {'tok2vec': {'assigns': ['doc.tensor'], 'requires': [], 'scores': [], 'retokenizes': False}, 'tagger': {'assigns': ['token.tag'], 'requires': [], 'scores': ['tag_acc'], 'retokenizes': False}, 'parser': {'assigns': ['token.dep', 'token.head', 'token.is_sent_start', 'doc.sents'], 'requires': [], 'scores': ['dep_uas', 'dep_las', 'dep_las_per_type', 'sents_p', 'sents_r', 'sents_f'], 'retokenizes': False}, 'attribute_ruler': {'assigns': [], 'requires': [], 'scores': [], 'retokenizes': False}, 'lemmatizer': {'assigns': ['token.lemma'], 'requires': [], 'scores': ['lemma_acc'], 'retokenizes': False}, 'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'], 'requires': [], 'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'], 'retokenizes': False}, 'entity_ruler': {'assigns': ['doc.ents', 'token.ent_type', 'token.ent_iob'], 'requires': [], 'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'], 'retokenizes': False}}, 'problems': {'tok2vec': [], 'tagger': [], 'parser': [], 'attribute_ruler': [], 'lemmatizer': [], 'ner': [], 'entity_ruler': []}, 'attrs': {'doc.tensor': {'assigns': ['tok2vec'], 'requires': []}, 'token.tag': {'assigns': ['tagger'], 'requires': []}, 'token.head': {'assigns': ['parser'], 'requires': []}, 'token.dep': {'assigns': ['parser'], 'requires': []}, 'doc.sents': {'assigns': ['parser'], 'requires': []}, 'token.ent_iob': {'assigns': ['ner', 'entity_ruler'], 'requires': []}, 'token.is_sent_start': {'assigns': ['parser'], 'requires': []}, 'token.lemma': {'assigns': ['lemmatizer'], 'requires': []}, 'doc.ents': {'assigns': ['ner', 'entity_ruler'], 'requires': []}, 'token.ent_type': {'assigns': ['ner', 'entity_ruler'], 'requires': []}}}
nlp = spacy.load("en_core_web_sm")
ruler = nlp.add_pipe("entity_ruler", before="ner")
patterns = [
{"label": "GPE", "pattern": "West Chestertenfieldville"}
]
ruler.add_patterns(patterns)
nlp.analyze_pipes()
{'summary': {'tok2vec': {'assigns': ['doc.tensor'], 'requires': [], 'scores': [], 'retokenizes': False}, 'tagger': {'assigns': ['token.tag'], 'requires': [], 'scores': ['tag_acc'], 'retokenizes': False}, 'parser': {'assigns': ['token.dep', 'token.head', 'token.is_sent_start', 'doc.sents'], 'requires': [], 'scores': ['dep_uas', 'dep_las', 'dep_las_per_type', 'sents_p', 'sents_r', 'sents_f'], 'retokenizes': False}, 'attribute_ruler': {'assigns': [], 'requires': [], 'scores': [], 'retokenizes': False}, 'lemmatizer': {'assigns': ['token.lemma'], 'requires': [], 'scores': ['lemma_acc'], 'retokenizes': False}, 'entity_ruler': {'assigns': ['doc.ents', 'token.ent_type', 'token.ent_iob'], 'requires': [], 'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'], 'retokenizes': False}, 'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'], 'requires': [], 'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'], 'retokenizes': False}}, 'problems': {'tok2vec': [], 'tagger': [], 'parser': [], 'attribute_ruler': [], 'lemmatizer': [], 'entity_ruler': [], 'ner': []}, 'attrs': {'doc.tensor': {'assigns': ['tok2vec'], 'requires': []}, 'token.tag': {'assigns': ['tagger'], 'requires': []}, 'token.head': {'assigns': ['parser'], 'requires': []}, 'token.dep': {'assigns': ['parser'], 'requires': []}, 'doc.sents': {'assigns': ['parser'], 'requires': []}, 'token.ent_iob': {'assigns': ['entity_ruler', 'ner'], 'requires': []}, 'token.is_sent_start': {'assigns': ['parser'], 'requires': []}, 'token.lemma': {'assigns': ['lemmatizer'], 'requires': []}, 'doc.ents': {'assigns': ['entity_ruler', 'ner'], 'requires': []}, 'token.ent_type': {'assigns': ['entity_ruler', 'ner'], 'requires': []}}}
doc = nlp(text)
for ent in doc.ents:
print (ent.text, ent.label_)
West Chestertenfieldville GPE Deeds PERSON
nlp3 = spacy.load("en_core_web_sm")
ruler = nlp3.add_pipe("entity_ruler", before="ner")
patterns = [
{"label": "GPE", "pattern": "West Chestertenfieldville"},
{"label": "FILM", "pattern": "Mr. Deeds"}
]
ruler.add_patterns(patterns)
doc = nlp3(text)
for ent in doc.ents:
print (ent.text, ent.label_)
West Chestertenfieldville GPE Mr. Deeds FILM
text = "This is a sample number (555) 555-5555."
nlp = spacy.blank("en")
ruler = nlp.add_pipe("entity_ruler")
patterns = [
{"label": "PHONE_NUMBER",
"pattern": [
{"ORTH": "("},
{"SHAPE": "ddd"},
{"ORTH": ")"},
{"SHAPE": "ddd"},
{"ORTH": "-", "OP": "?"},
{"SHAPE": "dddd"}
]}
]
ruler.add_patterns(patterns)
doc = nlp(text)
for ent in doc.ents:
print (ent.text, ent.label_)
(555) 555-5555 PHONE_NUMBER