import re from django.conf import settings nlp = settings.MODEL def clean_text(text): # replace some letter in text for getting better performance text = re.sub(r':\s*', ' ', text) text = re.sub(r'&', ',', text) text = re.sub(r'\.*\n\.*', '.', text) text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text) return text def extract_names(text): text = clean_text(text) doc = nlp(text) names = {ent.text for ent in doc.ents if ent.label_ in ['PERSON', 'ORG']} return list(names)