Parcourir la source

Fixes some error in model report generator and add dataset making command.

Mohidul Islam il y a 4 ans
Parent
commit
cc79f6878f
3 fichiers modifiés avec 44 ajouts et 8 suppressions
  1. 4 0
      .gitignore
  2. 11 0
      review/management/commands/make_dataset.py
  3. 29 8
      review/model_pred_report.py

+ 4 - 0
.gitignore

@@ -6,6 +6,10 @@ __pycache__/
 .idea
 review_data_set.csv
 # C extensions
+*.csv
+geckodriver.log
+*.out
+
 *.so
 client_secrets.json
 # Distribution / packaging

+ 11 - 0
review/management/commands/make_dataset.py

@@ -0,0 +1,11 @@
+from django.core.management.base import BaseCommand
+from review.model_pred_report import making_dataset
+
+
+class Command(BaseCommand):
+
+    help = 'Making dataset with all facebook and google reviews.'
+
+    def handle(self, *args, **options):
+        making_dataset()
+        self.stdout.write(self.style.SUCCESS(f'Dataset with facebook and google review has been exported.'))

+ 29 - 8
review/model_pred_report.py

@@ -7,6 +7,7 @@ from django.conf import settings
 from nlu_job.nlu_utils import clean_text
 
 from .models import Review, CustomReply
+from facebook_app.models import FacebookReview
 
 
 # constants
@@ -23,18 +24,18 @@ def model_inference(text):
     if response.status_code == 200:
         res = response.json()
     res_intents = res.get('response').get('intents')
+    # print(res_intents)
     intents = []
-    for k, v in res_intents:
-        if v <= 0.3:
+    for k, v in res_intents.items():
+        if float(v) >= 0.3:
             intents.append(k)
     return intents
 
 
-def get_review_actual_intent(review):
-    actual_reply = review.reply.replied_text
+def get_review_actual_intent(reply):
     for c_r in replies:
         replied_text = c_r.reply
-        similarity = SequenceMatcher(None, actual_reply, replied_text).ratio()
+        similarity = SequenceMatcher(None, reply, replied_text).ratio()
         if similarity > 0.7:
             return c_r.reply_category
     return None
@@ -47,10 +48,10 @@ def do_predict_correctly(review, actual_intent):
 
 def get_review_report_of_nth_days(days):
     date = timezone.now() - timezone.timedelta(days=days)
-    reviews = Review.objects.filter(create_time__gte=date, star_rating__gte=4).exclude(comment=None)
+    reviews = Review.objects.filter(create_time__gte=date, star_rating__gte=4).exclude(comment=None).exclude(reply=None)
 
     # Write report into a csv
-    with open('review_report.csv', 'w') as file:
+    with open(f'{days}_day_review_report.csv', 'w') as file:
         header_row = ['review', 'reply', 'model_inference', 'actual_class', 'classified']
         writer = csv.writer(file)
         writer.writerow(header_row)
@@ -58,7 +59,27 @@ def get_review_report_of_nth_days(days):
             review_text = review.comment
             reply = review.reply.replied_text
             model_pred = model_inference(review_text)
-            actual_class = get_review_actual_intent(review)
+            actual_class = get_review_actual_intent(reply)
             classified = do_predict_correctly(review, actual_class)
             row = [review_text, reply, model_pred, actual_class, classified]
             writer.writerow(row)
+
+
+def making_dataset():
+    google_reviews = Review.objects.all().exclude(comment=None).exclude(reply=None)
+    facebook_reviews = FacebookReview.objects.all().exclude(facebookreviewreply=None).exclude(review_text=None)
+    with open('review-dataset.csv', 'w') as file:
+        header_row = ['review', 'reply', 'class', 'platform']
+        writer = csv.writer(file)
+        writer.writerow(header_row)
+        for review in google_reviews:
+            review_text = review.comment
+            reply_text = review.reply.replied_text
+            label = get_review_actual_intent(reply_text)
+            writer.writerow([review_text, reply_text, label, 'google'])
+
+        for rev in facebook_reviews:
+            review_text = rev.review_text
+            reply_text = rev.facebookreviewreply.replied_text
+            label = get_review_actual_intent(reply_text)
+            writer.writerow([review_text, reply_text, label, 'facebook'])