|
@@ -0,0 +1,40 @@
|
|
|
+from django.conf import settings
|
|
|
+from django.utils import timezone
|
|
|
+from review.models import Review
|
|
|
+
|
|
|
+nlp = settings.MODEL
|
|
|
+STOP_WORDS = ['signature', 'care', 'emergency', 'er', 'center', 'nurse', 'dr', 'dr.', 'signaturecare']
|
|
|
+
|
|
|
+
|
|
|
+def extract_names_1st(location_id):
|
|
|
+ '''
|
|
|
+ Extract all names that presented in this month's all reviews
|
|
|
+ :param location_id: string -> ID of the location which you want to collect reviews
|
|
|
+ :return: names: list -> A list of names that found in reviews
|
|
|
+ '''
|
|
|
+ reviews = Review.objects.filter(
|
|
|
+ location_id=location_id,
|
|
|
+ create_time__gte=timezone.now().replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
|
+ )\
|
|
|
+ .exclude(comment=None)
|
|
|
+ names = []
|
|
|
+ for r in reviews:
|
|
|
+ doc = nlp(r.comment)
|
|
|
+ for e in doc.ents:
|
|
|
+ names.append(e.text) if e.label_ in ['PERSON', 'ORG'] else None
|
|
|
+ return names
|
|
|
+
|
|
|
+
|
|
|
+def extract_names_2nd(names):
|
|
|
+ '''
|
|
|
+ 2nd order names extraction.
|
|
|
+ removing all hand labeled stop words
|
|
|
+ :param names: List -> list of name.
|
|
|
+ :return: List -> list of name.
|
|
|
+ '''
|
|
|
+ new_names = []
|
|
|
+ for name in names:
|
|
|
+ for n in name.split():
|
|
|
+ if n.lower() not in STOP_WORDS and len(n) > 2:
|
|
|
+ new_names.append(n.lower())
|
|
|
+ return new_names
|