import re from django.conf import settings from .models import Staff nlp = settings.MODEL STOP_WORDS = ['signature', 'care', 'emergency', 'er', 'center', 'nurse', 'dr', 'dr.', 'signaturecare', 'tech', 'doc', 'urgent', 'the', 'nures', 'nurses', 'registration'] def clean_text(text): # replace some letter in text for getting better performance text = re.sub(r':\s*', ' ', text) text = re.sub(r'&', ', ', text) text = re.sub(r'/', ', ', text) text = re.sub(r'\.*\n\.*', '.', text) text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text) return text def cleaning_name(names): cleaned_names = [] for name in names: for n in name.split(): cleaned_names.append(n) if len(n) > 2 and n.lower() not in STOP_WORDS else None return cleaned_names def extract_names(text): text = clean_text(text) doc = nlp(text) names = {ent.text for ent in doc.ents if ent.label_ in ['PERSON', 'ORG']} names = list(names) cleaned_names = cleaning_name(names) return cleaned_names def add_point_to_staff_profile(review): staffs = Staff.objects.all() names = extract_names(review.comment) point_unit = 1/len(names) # TODO: store points to the staff profile