123456789101112131415161718192021 |
- import re
- from django.conf import settings
- nlp = settings.MODEL
- def clean_text(text):
- # replace some letter in text for getting better performance
- text = re.sub(r':\s*', ' ', text)
- text = re.sub(r'&', ',', text)
- text = re.sub(r'\.*\n\.*', '.', text)
- text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
- return text
- def extract_names(text):
- text = clean_text(text)
- doc = nlp(text)
- names = {ent.text for ent in doc.ents if ent.label_ in ['PERSON', 'ORG']}
- return list(names)
|