import re from decimal import Decimal from django.conf import settings from .models import Staff from nlu_job.nlu_utils import model_inference nlp = settings.MODEL nlu_url = settings.NLU_SERVER_URI def get_all_names(text): res = model_inference(text) entities = res.get('entities') names = {ent.get('value') for ent in entities if ent.get('entity') in ['PERSON', 'ORG']} return names def all_staffs(location): names = [] staffs = Staff.objects.filter(location=location) for s in staffs: names.extend(s.get_nick_names) return set(names) def clean_text(text): # replace some letter in text for getting better performance text = re.sub(r':\s*', ' ', text) text = re.sub(r'&', ', ', text) text = re.sub(r'/', ', ', text) text = re.sub(r'\.*\n\.*', '.', text) text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text) return text def extract_names(review): text = clean_text(review.comment) # doc = nlp(text) # names = {ent.text for ent in doc.ents if ent.label_ in ['PERSON', 'ORG']} names = get_all_names(text) cleaned_names = [] for name in names: for n in name.split(): cleaned_names.append(n) if n.lower() in all_staffs(review.location) else None return cleaned_names def add_point_to_staff_profile(review): staffs = Staff.objects.filter(location=review.location) names = extract_names(review) point_unit = Decimal(1/len(names)) if not names == [] else 0 for name in names: for staff in staffs: if name.lower() in staff.get_nick_names: staff.name_mentioned += 1 staff.total_units += point_unit staff.save() def make_all_staffs_point_zero(): staffs = Staff.objects.all() for s in staffs: s.total_units = 0 s.name_mentioned = 0 s.save() def extract_names_from_reviews(reviews): for review in reviews: add_point_to_staff_profile(review)