nlu_utils.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. import re
  2. import operator
  3. from django.conf import settings
  4. from requests import post
  5. import json
  6. from difflib import SequenceMatcher
  7. from review.models import Review
  8. nlu_server_url = getattr(settings, 'NLU_SERVER_URI')
  9. ner_model = getattr(settings, 'SPACY_NER_MODEL')
  10. def filter_with_last_ten_reviews(location_id, replies):
  11. replies = list(replies)
  12. revs = Review.objects.filter(location_id=location_id).exclude(reply=None).order_by('-update_time')[:10]
  13. for r in revs:
  14. s1 = r.reply.replied_text
  15. for rep in replies:
  16. s2 = rep.reply
  17. similarity = SequenceMatcher(None, s1, s2).ratio()
  18. if similarity > 0.7:
  19. replies.remove(rep)
  20. print('%.2f'%similarity, ' -------------- ', rep.reply_category)
  21. return replies
  22. def clean_text(text):
  23. # replace some letter in text for getting better performance
  24. text = re.sub(r':\s*', ' ', text)
  25. text = re.sub(r'&', ', ', text)
  26. text = re.sub(r'/', ', ', text)
  27. text = re.sub(r'\.*\n\.*', ', ', text)
  28. text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
  29. emoji_pattern = re.compile("["
  30. u"\U0001F600-\U0001F64F" # emoticons
  31. u"\U0001F300-\U0001F5FF" # symbols & pictographs
  32. u"\U0001F680-\U0001F6FF" # transport & map symbols
  33. u"\U0001F1E0-\U0001F1FF" # flags (iOS)
  34. "]+", flags=re.UNICODE)
  35. text = re.sub(emoji_pattern, ' ', text)
  36. return text
  37. def model_inference(text):
  38. # url = nlu_server_url + '/model/parse'
  39. url = nlu_server_url + '/predict'
  40. text = clean_text(text)
  41. payload = {'text': text}
  42. headers = {'content-type': 'application/json'}
  43. response = post(url, data=json.dumps(payload), headers=headers)
  44. if response.status_code == 200:
  45. return response.json()
  46. return response
  47. def is_a_name(name):
  48. '''
  49. function that decide whether it is a person name or not
  50. :param -> a string usually reviewer name:
  51. :return -> a boolean True/False:
  52. '''
  53. doc = ner_model(name)
  54. for ent in doc.ents:
  55. if ent.label_ in ['PERSON']:
  56. return True
  57. return False
  58. # This function will not use anymore
  59. def analyze_inference(response):
  60. '''
  61. response has four property
  62. ['intent', 'entities', 'intent_ranking', 'text']
  63. we took all intents that has more than 10% of intent confident.
  64. all the intents that has bellow confidence has been omitted.
  65. :param response: JSON -> a json response that RASA NLU server respond.
  66. :return: DICT ->dictionary with key of intent and value of it's confident.
  67. '''
  68. res_intents = response.get('intent_ranking')
  69. intents = {}
  70. for intent in res_intents:
  71. key = intent.get('name')
  72. values = intent.get('confidence')
  73. if values > 0.1:
  74. intents[key] = int(values*100)
  75. return intents
  76. def analyze_model_inference(response):
  77. res_intents = response.get('response').get('intents')
  78. sorted_dict = dict(sorted(res_intents.items(), key=operator.itemgetter(1), reverse=True))
  79. intents = {}
  80. for key, value in sorted_dict.items():
  81. value = float(value)
  82. if value > 0.1:
  83. intents[key] = int(value*100)
  84. return intents