When wanting to apply NER to concise concepts, it is really easy to come up with examples, but pretty difficult to train an entire pipeline. Concise Concepts uses few-shot NER based on word embedding similarity to get you going with easy! Now with entity scoring!
pip install concise-concepts
import spacy
from spacy import displacy
import concise_concepts
data = {
"fruit": ["apple", "pear", "orange"],
"vegetable": ["broccoli", "spinach", "tomato"],
"meat": ["beef", "pork", "fish", "lamb"]
}
text = """
Heat the oil in a large pan and add the Onion, celery and carrots.
Then, cook over a medium–low heat for 10 minutes, or until softened.
Add the courgette, garlic, red peppers and oregano and cook for 2–3 minutes.
Later, add some oranges and chickens. """
nlp = spacy.load("en_core_web_lg", disable=["ner"])
# ent_score for entity condifence scoring
nlp.add_pipe("concise_concepts", config={"data": data, "ent_score": True})
doc = nlp(text)
options = {"colors": {"fruit": "darkorange", "vegetable": "limegreen", "meat": "salmon"},
"ents": ["fruit", "vegetable", "meat"]}
ents = doc.ents
for ent in ents:
new_label = f"{ent.label_} ({float(ent._.ent_score):.0%})"
options["colors"][new_label] = options["colors"].get(ent.label_.lower(), None)
options["ents"].append(new_label)
ent.label_ = new_label
doc.ents = ents
displacy.render(doc, style="ent", options=options)
data = {
"fruit": ["apple", "pear", "orange"],
"vegetable": ["broccoli", "spinach", "tomato"],
"meat": ["beef", "pork", "fish", "lamb"]
}
topn = [50, 50, 150]
assert len(topn) == len
nlp.add_pipe("concise_concepts", config={"data": data, "topn": topn})
import spacy
import concise_concepts
data = {
"ORG": ["Google", "Apple", "Amazon"],
"GPE": ["Netherlands", "France", "China"],
}
text = """Sony was founded in Japan."""
nlp = spacy.load("en_core_web_lg")
nlp.add_pipe("concise_concepts", config={"data": data, "ent_score": True})
doc = nlp(text)
print([(ent.text, ent.label_, ent._.ent_score) for ent in doc.ents])
# output
#
# [('Sony', 'ORG', 0.63740385), ('Japan', 'GPE', 0.5896993)]
data = {
"fruit": ["apple", "pear", "orange"],
"vegetable": ["broccoli", "spinach", "tomato"],
"meat": ["beef", "pork", "fish", "lamb"]
}
# model from https://radimrehurek.com/gensim/downloader.html or path to local file
model_path = "glove-twitter-25"
nlp.add_pipe("concise_concepts", config={"data": data, "model_path": model_path})