Ver código fonte

clean a little bit and update requirements.txt

Mohidul Islam 5 anos atrás
pai
commit
8a98e950f9

+ 0 - 5
name_extractor/utils.py

@@ -4,8 +4,6 @@ from django.conf import settings
 from .models import Staff
 from nlu_job.nlu_utils import model_inference
 
-nlp = settings.MODEL
-
 nlu_url = settings.NLU_SERVER_URI
 
 
@@ -36,9 +34,6 @@ def clean_text(text):
 
 def extract_names(review):
     text = clean_text(review.comment)
-    # doc = nlp(text)
-    # names = {ent.text for ent in doc.ents if ent.label_ in ['PERSON', 'ORG']}
-
     names = get_all_names(text)
     cleaned_names = []
     for name in names:

+ 0 - 40
nlu_job/name_extraction.py

@@ -1,40 +0,0 @@
-from django.conf import settings
-from django.utils import timezone
-from review.models import Review
-
-nlp = settings.MODEL
-STOP_WORDS = ['signature', 'care', 'emergency', 'er', 'center', 'nurse', 'dr', 'dr.', 'signaturecare']
-
-
-def extract_names_1st(location_id):
-    '''
-    Extract all names that presented in this month's all reviews
-    :param location_id: string -> ID of the  location which you want to collect reviews
-    :return: names: list -> A list of names that found in reviews
-    '''
-    reviews = Review.objects.filter(
-        location_id=location_id,
-        create_time__gte=timezone.now().replace(day=1, hour=0, minute=0, second=0, microsecond=0)
-    )\
-        .exclude(comment=None)
-    names = []
-    for r in reviews:
-        doc = nlp(r.comment)
-        for e in doc.ents:
-            names.append(e.text) if e.label_ in ['PERSON', 'ORG'] else None
-    return names
-
-
-def extract_names_2nd(names):
-    '''
-    2nd order names extraction.
-    removing all hand labeled stop words
-    :param names: List -> list of name.
-    :return: List -> list of name.
-    '''
-    new_names = []
-    for name in names:
-        for n in name.split():
-            if n.lower() not in STOP_WORDS and len(n) > 2:
-                new_names.append(n.lower())
-    return new_names

+ 0 - 7
nlu_job/nlu_utils.py

@@ -6,7 +6,6 @@ from difflib import SequenceMatcher
 from review.models import Review
 
 nlu_server_url = settings.NLU_SERVER_URI
-ner_model = settings.MODEL
 
 
 def filter_with_last_ten_reviews(location_id, replies):
@@ -90,9 +89,3 @@ def analyze_inference(response):
             intents[key] = int(values*100)
 
     return intents
-
-
-def name_entity_recognition(text):
-    doc = ner_model(text)
-    names = [n for n in doc.ents if n.label_ == 'PERSON']
-    return names

+ 10 - 26
requirements.txt

@@ -1,38 +1,22 @@
 asgiref==3.2.3
-blis==0.4.1
-cachetools==3.1.1
-catalogue==1.0.0
+cachetools==4.0.0
 certifi==2019.11.28
 chardet==3.0.4
-cymem==2.0.3
-Django==3.0
-django-crispy-forms==1.8.1
+Django==3.0.4
+django-crispy-forms==1.9.0
 django-crontab==0.7.1
 djangorestframework==3.11.0
-en-core-web-md==2.2.5
-google-auth==1.8.2
+google-auth==1.11.2
 google-auth-oauthlib==0.4.1
-idna==2.8
-importlib-metadata==1.5.0
-murmurhash==1.0.2
+idna==2.9
 mysqlclient==1.4.6
-nameparser==1.0.6
-numpy==1.18.1
 oauthlib==3.1.0
-plac==1.1.3
-preshed==3.0.2
 pyasn1==0.4.8
-pyasn1-modules==0.2.7
+pyasn1-modules==0.2.8
 pytz==2019.3
-requests==2.22.0
+requests==2.23.0
 requests-oauthlib==1.3.0
 rsa==4.0
-six==1.13.0
-spacy==2.2.3
-sqlparse==0.3.0
-srsly==1.0.1
-thinc==7.3.1
-tqdm==4.42.0
-urllib3==1.25.7
-wasabi==0.6.0
-zipp==2.1.0
+six==1.14.0
+sqlparse==0.3.1
+urllib3==1.25.8

+ 0 - 7
review_automation/settings.py

@@ -1,6 +1,4 @@
 import os
-import spacy
-import en_core_web_md
 
 # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -135,11 +133,6 @@ CRONJOBS = [
     ('0 6 * * *', 'review.background_job.background_task'),
 ]
 
-# spaCy model
-
-# MODEL = spacy.load('en_core_web_md')
-MODEL = 'NLP NER MODEL'
-
 
 # Configure Email Server
 EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend'