nlu_utils.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import re
  2. import operator
  3. from django.conf import settings
  4. from requests import post
  5. import json
  6. from difflib import SequenceMatcher
  7. from review.models import Review
  8. from facebook_app.models import FacebookReview
  9. nlu_server_url = getattr(settings, 'NLU_SERVER_URI')
  10. ner_model = getattr(settings, 'SPACY_NER_MODEL')
  11. def filter_with_last_ten_reviews(location_id, replies, platform='google'):
  12. replies = list(replies)
  13. if platform == 'google':
  14. revs = Review.objects.filter(location_id=location_id).exclude(reply=None).order_by('-update_time')[:10]
  15. elif platform == 'facebook':
  16. revs = FacebookReview.objects.filter(page__location_id=location_id).\
  17. exclude(facebookreviewreply=None).order_by('create_time')[:10]
  18. for r in revs:
  19. if platform == 'google':
  20. s1 = r.reply.replied_text
  21. elif platform == 'facebook':
  22. s1 = r.facebookreviewreply.replied_text
  23. for rep in replies:
  24. s2 = rep.reply
  25. similarity = SequenceMatcher(None, s1, s2).ratio()
  26. if similarity > 0.7:
  27. replies.remove(rep)
  28. print('%.2f'%similarity, ' -------------- ', rep.reply_category)
  29. return replies
  30. def clean_text(text):
  31. # replace some letter in text for getting better performance
  32. text = re.sub(r':\s*', ' ', text)
  33. text = re.sub(r'&', ', ', text)
  34. text = re.sub(r'/', ', ', text)
  35. text = re.sub(r'\.*\n\.*', ', ', text)
  36. text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
  37. emoji_pattern = re.compile("["
  38. u"\U0001F600-\U0001F64F" # emoticons
  39. u"\U0001F300-\U0001F5FF" # symbols & pictographs
  40. u"\U0001F680-\U0001F6FF" # transport & map symbols
  41. u"\U0001F1E0-\U0001F1FF" # flags (iOS)
  42. "]+", flags=re.UNICODE)
  43. text = re.sub(emoji_pattern, ' ', text)
  44. return text
  45. def model_inference(text):
  46. # url = nlu_server_url + '/model/parse'
  47. url = nlu_server_url + '/predict'
  48. text = clean_text(text)
  49. payload = {'text': text}
  50. headers = {'content-type': 'application/json'}
  51. response = post(url, data=json.dumps(payload), headers=headers)
  52. if response.status_code == 200:
  53. return response.json()
  54. return response
  55. def is_a_name(name):
  56. '''
  57. function that decide whether it is a person name or not
  58. :param -> a string usually reviewer name:
  59. :return -> a boolean True/False:
  60. '''
  61. doc = ner_model(name)
  62. for ent in doc.ents:
  63. if ent.label_ in ['PERSON']:
  64. return True
  65. return False
  66. # This function will not use anymore
  67. def analyze_inference(response):
  68. '''
  69. response has four property
  70. ['intent', 'entities', 'intent_ranking', 'text']
  71. we took all intents that has more than 10% of intent confident.
  72. all the intents that has bellow confidence has been omitted.
  73. :param response: JSON -> a json response that RASA NLU server respond.
  74. :return: DICT ->dictionary with key of intent and value of it's confident.
  75. '''
  76. res_intents = response.get('intent_ranking')
  77. intents = {}
  78. for intent in res_intents:
  79. key = intent.get('name')
  80. values = intent.get('confidence')
  81. if values > 0.1:
  82. intents[key] = int(values*100)
  83. return intents
  84. def analyze_model_inference(response):
  85. res_intents = response.get('response').get('intents')
  86. sorted_dict = dict(sorted(res_intents.items(), key=operator.itemgetter(1), reverse=True))
  87. intents = {}
  88. for key, value in sorted_dict.items():
  89. value = float(value)
  90. if value > 0.1:
  91. intents[key] = int(value*100)
  92. return intents