nlu_utils.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import re
  2. import operator
  3. from django.conf import settings
  4. from requests import post
  5. import json
  6. from difflib import SequenceMatcher
  7. from review.models import Review
  8. nlu_server_url = settings.NLU_SERVER_URI
  9. def filter_with_last_ten_reviews(location_id, replies):
  10. replies = list(replies)
  11. revs = Review.objects.filter(location_id=location_id).exclude(reply=None).order_by('-update_time')[:10]
  12. for r in revs:
  13. s1 = r.reply.replied_text
  14. for rep in replies:
  15. s2 = rep.reply
  16. similarity = SequenceMatcher(None, s1, s2).ratio()
  17. if similarity > 0.7:
  18. replies.remove(rep)
  19. print('%.2f'%similarity, ' -------------- ', rep.reply_category)
  20. return replies
  21. def clean_text(text):
  22. # replace some letter in text for getting better performance
  23. text = re.sub(r':\s*', ' ', text)
  24. text = re.sub(r'&', ', ', text)
  25. text = re.sub(r'/', ', ', text)
  26. text = re.sub(r'\.*\n\.*', ', ', text)
  27. text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
  28. emoji_pattern = re.compile("["
  29. u"\U0001F600-\U0001F64F" # emoticons
  30. u"\U0001F300-\U0001F5FF" # symbols & pictographs
  31. u"\U0001F680-\U0001F6FF" # transport & map symbols
  32. u"\U0001F1E0-\U0001F1FF" # flags (iOS)
  33. "]+", flags=re.UNICODE)
  34. text = re.sub(emoji_pattern, ' ', text)
  35. return text
  36. def model_inference(text):
  37. # url = nlu_server_url + '/model/parse'
  38. url = nlu_server_url + '/predict'
  39. text = clean_text(text)
  40. payload = {'text': text}
  41. headers = {'content-type': 'application/json'}
  42. response = post(url, data=json.dumps(payload), headers=headers)
  43. if response.status_code == 200:
  44. return response.json()
  45. return response
  46. def is_a_name(name):
  47. '''
  48. function that decide whether it is a person name or not
  49. :param -> a string usually reviewer name:
  50. :return -> a boolean True/False:
  51. '''
  52. response = model_inference(name.title())
  53. entities = response.get('entities')
  54. if not entities:
  55. return False
  56. entity = entities[0]
  57. if entity.get('entity') == 'PERSON':
  58. return True
  59. else:
  60. return False
  61. # This function will not use anymore
  62. def analyze_inference(response):
  63. '''
  64. response has four property
  65. ['intent', 'entities', 'intent_ranking', 'text']
  66. we took all intents that has more than 10% of intent confident.
  67. all the intents that has bellow confidence has been omitted.
  68. :param response: JSON -> a json response that RASA NLU server respond.
  69. :return: DICT ->dictionary with key of intent and value of it's confident.
  70. '''
  71. res_intents = response.get('intent_ranking')
  72. intents = {}
  73. for intent in res_intents:
  74. key = intent.get('name')
  75. values = intent.get('confidence')
  76. if values > 0.1:
  77. intents[key] = int(values*100)
  78. return intents
  79. def analyze_model_inference(response):
  80. res_intents = response.get('response').get('intents')
  81. sorted_dict = dict(sorted(res_intents.items(), key=operator.itemgetter(1), reverse=True))
  82. intents = {}
  83. for key, value in sorted_dict.items():
  84. value = float(value)
  85. if value > 0.1:
  86. intents[key] = int(value*100)
  87. return intents