5 年前 · 58fe82948a
--- a/nlu_job/name_extraction.py
+++ b/nlu_job/name_extraction.py
@@ -0,0 +1,40 @@
 
				+from django.conf import settings
			
 
				+from django.utils import timezone
			
 
				+from review.models import Review
			
 
				+
			
 
				+nlp = settings.MODEL
			
 
				+STOP_WORDS = ['signature', 'care', 'emergency', 'er', 'center', 'nurse', 'dr', 'dr.', 'signaturecare']
			
 
				+
			
 
				+
			
 
				+def extract_names_1st(location_id):
			
 
				+    '''
			
 
				+    Extract all names that presented in this month's all reviews
			
 
				+    :param location_id: string -> ID of the  location which you want to collect reviews
			
 
				+    :return: names: list -> A list of names that found in reviews
			
 
				+    '''
			
 
				+    reviews = Review.objects.filter(
			
 
				+        location_id=location_id,
			
 
				+        create_time__gte=timezone.now().replace(day=1, hour=0, minute=0, second=0, microsecond=0)
			
 
				+    )\
			
 
				+        .exclude(comment=None)
			
 
				+    names = []
			
 
				+    for r in reviews:
			
 
				+        doc = nlp(r.comment)
			
 
				+        for e in doc.ents:
			
 
				+            names.append(e.text) if e.label_ in ['PERSON', 'ORG'] else None
			
 
				+    return names
			
 
				+
			
 
				+
			
 
				+def extract_names_2nd(names):
			
 
				+    '''
			
 
				+    2nd order names extraction.
			
 
				+    removing all hand labeled stop words
			
 
				+    :param names: List -> list of name.
			
 
				+    :return: List -> list of name.
			
 
				+    '''
			
 
				+    new_names = []
			
 
				+    for name in names:
			
 
				+        for n in name.split():
			
 
				+            if n.lower() not in STOP_WORDS and len(n) > 2:
			
 
				+                new_names.append(n.lower())
			
 
				+    return new_names