utils.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. import re
  2. from decimal import Decimal
  3. from django.conf import settings
  4. from .models import Staff
  5. ner_model = getattr(settings, 'SPACY_NER_MODEL')
  6. def get_all_names(text):
  7. doc = ner_model(text)
  8. names = {ent.text for ent in doc.ents if ent.label_ in ['PERSON', 'ORG', 'PRODUCT']}
  9. return names
  10. def all_staffs(location):
  11. names = []
  12. staffs = Staff.objects.filter(location=location)
  13. for s in staffs:
  14. names.extend(s.get_nick_names)
  15. return set(names)
  16. def clean_text(text):
  17. # replace some letter in text for getting better performance
  18. text = re.sub(r':\s*', ' ', text)
  19. text = re.sub(r'&', ', ', text)
  20. text = re.sub(r'/', ', ', text)
  21. text = re.sub(r'\.*\n\.*', '.', text)
  22. text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
  23. return text
  24. def extract_names(review):
  25. text = clean_text(review.comment)
  26. names = get_all_names(text)
  27. cleaned_names = []
  28. for name in names:
  29. for n in name.split():
  30. cleaned_names.append(n) if n.lower() in all_staffs(review.location) else None
  31. return cleaned_names
  32. def add_point_to_staff_profile(review):
  33. staffs = Staff.objects.filter(location=review.location)
  34. names = extract_names(review)
  35. point_unit = Decimal(1/len(names)) if not names == [] else 0
  36. for name in names:
  37. for staff in staffs:
  38. if name.lower() in staff.get_nick_names:
  39. staff.name_mentioned += 1
  40. staff.total_units += point_unit
  41. staff.save()
  42. def make_all_staffs_point_zero():
  43. staffs = Staff.objects.all()
  44. for s in staffs:
  45. s.total_units = 0
  46. s.name_mentioned = 0
  47. s.save()
  48. def extract_names_from_reviews(reviews):
  49. for review in reviews:
  50. add_point_to_staff_profile(review)