|
@@ -1,21 +1,41 @@
|
|
|
import re
|
|
|
from django.conf import settings
|
|
|
-
|
|
|
+from .models import Staff
|
|
|
|
|
|
nlp = settings.MODEL
|
|
|
+STOP_WORDS = ['signature', 'care', 'emergency', 'er', 'center', 'nurse', 'dr', 'dr.', 'signaturecare', 'tech',
|
|
|
+ 'doc', 'urgent', 'the', 'nures', 'nurses', 'registration']
|
|
|
|
|
|
|
|
|
def clean_text(text):
|
|
|
# replace some letter in text for getting better performance
|
|
|
text = re.sub(r':\s*', ' ', text)
|
|
|
- text = re.sub(r'&', ',', text)
|
|
|
+ text = re.sub(r'&', ', ', text)
|
|
|
+ text = re.sub(r'/', ', ', text)
|
|
|
text = re.sub(r'\.*\n\.*', '.', text)
|
|
|
text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
|
|
|
return text
|
|
|
|
|
|
|
|
|
+def cleaning_name(names):
|
|
|
+ cleaned_names = []
|
|
|
+ for name in names:
|
|
|
+ for n in name.split():
|
|
|
+ cleaned_names.append(n) if len(n) > 2 and n.lower() not in STOP_WORDS else None
|
|
|
+ return cleaned_names
|
|
|
+
|
|
|
+
|
|
|
def extract_names(text):
|
|
|
text = clean_text(text)
|
|
|
doc = nlp(text)
|
|
|
names = {ent.text for ent in doc.ents if ent.label_ in ['PERSON', 'ORG']}
|
|
|
- return list(names)
|
|
|
+ names = list(names)
|
|
|
+ cleaned_names = cleaning_name(names)
|
|
|
+ return cleaned_names
|
|
|
+
|
|
|
+
|
|
|
+def add_point_to_staff_profile(review):
|
|
|
+ staffs = Staff.objects.all()
|
|
|
+ names = extract_names(review.comment)
|
|
|
+ point_unit = 1/len(names)
|
|
|
+ # TODO: store points to the staff profile
|