model_pred_report.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import csv
  2. import json
  3. import requests
  4. from difflib import SequenceMatcher
  5. from django.utils import timezone
  6. from django.conf import settings
  7. from nlu_job.nlu_utils import clean_text
  8. from .models import Review, CustomReply
  9. from facebook_app.models import FacebookReview
  10. # constants
  11. nlu_server_url = settings.NLU_SERVER_URI
  12. replies = CustomReply.objects.all()
  13. def model_inference(text):
  14. url = nlu_server_url + '/predict'
  15. text = clean_text(text)
  16. payload = {'text': text}
  17. headers = {'content-type': 'application/json'}
  18. response = requests.post(url, data=json.dumps(payload), headers=headers)
  19. if response.status_code == 200:
  20. res = response.json()
  21. res_intents = res.get('response').get('intents')
  22. # print(res_intents)
  23. intents = []
  24. for k, v in res_intents.items():
  25. if float(v) >= 0.3:
  26. intents.append(k)
  27. return intents
  28. def get_review_actual_intent(reply):
  29. for c_r in replies:
  30. replied_text = c_r.reply
  31. similarity = SequenceMatcher(None, reply, replied_text).ratio()
  32. if similarity > 0.7:
  33. return c_r.reply_category
  34. return None
  35. def do_predict_correctly(review, actual_intent):
  36. intents = model_inference(review.comment)
  37. return 1 if actual_intent in intents else 0
  38. def get_review_report_of_nth_days(days):
  39. date = timezone.now() - timezone.timedelta(days=days)
  40. reviews = Review.objects.filter(create_time__gte=date, star_rating__gte=4).exclude(comment=None).exclude(reply=None)
  41. # Write report into a csv
  42. with open(f'{days}_day_review_report.csv', 'w') as file:
  43. header_row = ['review', 'reply', 'model_inference', 'actual_class', 'classified']
  44. writer = csv.writer(file)
  45. writer.writerow(header_row)
  46. for review in reviews:
  47. review_text = review.comment
  48. reply = review.reply.replied_text
  49. model_pred = model_inference(review_text)
  50. if model_pred:
  51. model_pred = model_pred[0]
  52. actual_class = get_review_actual_intent(reply)
  53. classified = do_predict_correctly(review, actual_class)
  54. row = [review_text, reply, model_pred, actual_class, classified]
  55. writer.writerow(row)
  56. def making_dataset():
  57. google_reviews = Review.objects.all().exclude(comment=None).exclude(reply=None)
  58. facebook_reviews = FacebookReview.objects.all().exclude(facebookreviewreply=None).exclude(review_text=None)
  59. with open('review-dataset.csv', 'w') as file:
  60. header_row = ['review', 'reply', 'class', 'platform']
  61. writer = csv.writer(file)
  62. writer.writerow(header_row)
  63. for review in google_reviews:
  64. review_text = review.comment
  65. reply_text = review.reply.replied_text
  66. label = get_review_actual_intent(reply_text)
  67. writer.writerow([review_text, reply_text, label, 'google'])
  68. for rev in facebook_reviews:
  69. review_text = rev.review_text
  70. reply_text = rev.facebookreviewreply.replied_text
  71. label = get_review_actual_intent(reply_text)
  72. writer.writerow([review_text, reply_text, label, 'facebook'])