nlu_utils.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import re
  2. from django.conf import settings
  3. from requests import post
  4. import json
  5. from difflib import SequenceMatcher
  6. from review.models import Review
  7. nlu_server_url = settings.NLU_SERVER_URI
  8. ner_model = settings.MODEL
  9. def filter_with_last_ten_reviews(location_id, replies):
  10. replies = list(replies)
  11. revs = Review.objects.filter(location_id=location_id).exclude(reply=None).order_by('-update_time')[:10]
  12. for r in revs:
  13. s1 = r.reply.replied_text
  14. for rep in replies:
  15. s2 = rep.reply
  16. similarity = SequenceMatcher(None, s1, s2).ratio()
  17. if similarity > 0.7:
  18. replies.remove(rep)
  19. print('%.2f'%similarity, ' -------------- ', rep.reply_category)
  20. return replies
  21. def clean_text(text):
  22. # replace some letter in text for getting better performance
  23. text = re.sub(r':\s*', ' ', text)
  24. text = re.sub(r'&', ', ', text)
  25. text = re.sub(r'/', ', ', text)
  26. text = re.sub(r'\.*\n\.*', ', ', text)
  27. text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
  28. emoji_pattern = re.compile("["
  29. u"\U0001F600-\U0001F64F" # emoticons
  30. u"\U0001F300-\U0001F5FF" # symbols & pictographs
  31. u"\U0001F680-\U0001F6FF" # transport & map symbols
  32. u"\U0001F1E0-\U0001F1FF" # flags (iOS)
  33. "]+", flags=re.UNICODE)
  34. text = re.sub(emoji_pattern, ' ', text)
  35. return text
  36. def model_inference(text):
  37. url = nlu_server_url + '/model/parse'
  38. text = clean_text(text)
  39. payload = {'text': text}
  40. headers = {'content-type': 'application/json'}
  41. response = post(url, data=json.dumps(payload), headers=headers)
  42. if response.status_code == 200:
  43. return response.json()
  44. return response
  45. def is_a_name(name):
  46. '''
  47. function that decide whether it is a person name or not
  48. :param -> a string usually reviewer name:
  49. :return -> a boolean True/False:
  50. '''
  51. response = model_inference(name.title())
  52. entities = response.get('entities')
  53. if not entities:
  54. return False
  55. entity = entities[0]
  56. if entity.get('entity') == 'PERSON':
  57. return True
  58. else:
  59. return False
  60. def analyze_inference(response):
  61. '''
  62. response has four property
  63. ['intent', 'entities', 'intent_ranking', 'text']
  64. we took all intents that has more than 10% of intent confident.
  65. all the intents that has bellow confidence has been omitted.
  66. :param response: JSON -> a json response that RASA NLU server respond.
  67. :return: DICT ->dictionary with key of intent and value of it's confident.
  68. '''
  69. res_intents = response.get('intent_ranking')
  70. intents = {}
  71. for intent in res_intents:
  72. key = intent.get('name')
  73. values = intent.get('confidence')
  74. if values > 0.1:
  75. intents[key] = int(values*100)
  76. return intents
  77. def name_entity_recognition(text):
  78. doc = ner_model(text)
  79. names = [n for n in doc.ents if n.label_ == 'PERSON']
  80. return names