import csv import json import requests from difflib import SequenceMatcher from django.utils import timezone from django.conf import settings from nlu_job.nlu_utils import clean_text from .models import Review, CustomReply from facebook_app.models import FacebookReview # constants nlu_server_url = settings.NLU_SERVER_URI replies = CustomReply.objects.all() def model_inference(text): url = nlu_server_url + '/predict' text = clean_text(text) payload = {'text': text} headers = {'content-type': 'application/json'} response = requests.post(url, data=json.dumps(payload), headers=headers) if response.status_code == 200: res = response.json() res_intents = res.get('response').get('intents') # print(res_intents) intents = [] for k, v in res_intents.items(): if float(v) >= 0.3: intents.append(k) return intents def get_review_actual_intent(reply): for c_r in replies: replied_text = c_r.reply similarity = SequenceMatcher(None, reply, replied_text).ratio() if similarity > 0.7: return c_r.reply_category return None def do_predict_correctly(review, actual_intent): intents = model_inference(review.comment) return 1 if actual_intent in intents else 0 def get_review_report_of_nth_days(days): date = timezone.now() - timezone.timedelta(days=days) reviews = Review.objects.filter(create_time__gte=date, star_rating__gte=4).exclude(comment=None).exclude(reply=None) # Write report into a csv with open(f'{days}_day_review_report.csv', 'w') as file: header_row = ['review', 'reply', 'model_inference', 'actual_class', 'classified'] writer = csv.writer(file) writer.writerow(header_row) for review in reviews: review_text = review.comment reply = review.reply.replied_text model_pred = model_inference(review_text) if model_pred: model_pred = model_pred[0] actual_class = get_review_actual_intent(reply) classified = do_predict_correctly(review, actual_class) row = [review_text, reply, model_pred, actual_class, classified] writer.writerow(row) def making_dataset(): google_reviews = Review.objects.all().exclude(comment=None).exclude(reply=None) facebook_reviews = FacebookReview.objects.all().exclude(facebookreviewreply=None).exclude(review_text=None) with open('review-dataset.csv', 'w') as file: header_row = ['review', 'reply', 'class', 'platform'] writer = csv.writer(file) writer.writerow(header_row) for review in google_reviews: review_text = review.comment reply_text = review.reply.replied_text label = get_review_actual_intent(reply_text) writer.writerow([review_text, reply_text, label, 'google']) for rev in facebook_reviews: review_text = rev.review_text reply_text = rev.facebookreviewreply.replied_text label = get_review_actual_intent(reply_text) writer.writerow([review_text, reply_text, label, 'facebook'])