nlu_utils.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. import re
  2. import operator
  3. import requests
  4. from django.conf import settings
  5. from requests import post
  6. import json
  7. from difflib import SequenceMatcher
  8. from review.models import Review
  9. from facebook_app.models import FacebookReview
  10. nlu_server_url = getattr(settings, 'NLU_SERVER_URI')
  11. ner_model_url = getattr(settings, 'NER_SERVER_URI')
  12. def filter_with_last_ten_reviews(location_id, replies, platform='google'):
  13. replies = list(replies)
  14. if platform == 'google':
  15. revs = Review.objects.filter(location_id=location_id).exclude(reply=None).order_by('-update_time')[:10]
  16. elif platform == 'facebook':
  17. revs = FacebookReview.objects.filter(page__location_id=location_id).\
  18. exclude(facebookreviewreply=None).order_by('create_time')[:10]
  19. for r in revs:
  20. if platform == 'google':
  21. s1 = r.reply.replied_text
  22. elif platform == 'facebook':
  23. s1 = r.facebookreviewreply.replied_text
  24. for rep in replies:
  25. s2 = rep.reply
  26. similarity = SequenceMatcher(None, s1, s2).ratio()
  27. if similarity > 0.7:
  28. replies.remove(rep)
  29. print('%.2f'%similarity, ' -------------- ', rep.reply_category)
  30. return replies
  31. def clean_text(text):
  32. # replace some letter in text for getting better performance
  33. text = re.sub(r':\s*', ' ', text)
  34. text = re.sub(r'&', ', ', text)
  35. text = re.sub(r'/', ', ', text)
  36. text = re.sub(r'\.*\n\.*', ', ', text)
  37. text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
  38. emoji_pattern = re.compile("["
  39. u"\U0001F600-\U0001F64F" # emoticons
  40. u"\U0001F300-\U0001F5FF" # symbols & pictographs
  41. u"\U0001F680-\U0001F6FF" # transport & map symbols
  42. u"\U0001F1E0-\U0001F1FF" # flags (iOS)
  43. "]+", flags=re.UNICODE)
  44. text = re.sub(emoji_pattern, ' ', text)
  45. return text
  46. def model_inference(text):
  47. # url = nlu_server_url + '/model/parse'
  48. url = nlu_server_url + '/predict'
  49. text = clean_text(text)
  50. payload = {'text': text}
  51. headers = {'content-type': 'application/json'}
  52. response = post(url, data=json.dumps(payload), headers=headers)
  53. if response.status_code == 200:
  54. return response.json()
  55. return response
  56. def is_a_name(name):
  57. '''
  58. function that decide whether it is a person name or not
  59. :param -> a string usually reviewer name:
  60. :return -> a boolean True/False:
  61. '''
  62. url = ner_model_url + '/name'
  63. payload = {
  64. 'name': name
  65. }
  66. headers = {'content-type': 'application/json'}
  67. res = requests.post(url, data=json.dumps(payload), headers=headers).json()
  68. return res.get('name')
  69. # This function will not use anymore
  70. def analyze_inference(response):
  71. '''
  72. response has four property
  73. ['intent', 'entities', 'intent_ranking', 'text']
  74. we took all intents that has more than 10% of intent confident.
  75. all the intents that has bellow confidence has been omitted.
  76. :param response: JSON -> a json response that RASA NLU server respond.
  77. :return: DICT ->dictionary with key of intent and value of it's confident.
  78. '''
  79. res_intents = response.get('intent_ranking')
  80. intents = {}
  81. for intent in res_intents:
  82. key = intent.get('name')
  83. values = intent.get('confidence')
  84. if values > 0.1:
  85. intents[key] = int(values*100)
  86. return intents
  87. def analyze_model_inference(response):
  88. res_intents = response.get('response').get('intents')
  89. sorted_dict = dict(sorted(res_intents.items(), key=operator.itemgetter(1), reverse=True))
  90. intents = {}
  91. for key, value in sorted_dict.items():
  92. value = float(value)
  93. if value > 0.1:
  94. intents[key] = int(value*100)
  95. return intents