123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- import re
- import operator
- import requests
- from django.conf import settings
- from requests import post
- import json
- from difflib import SequenceMatcher
- from review.models import Review
- from facebook_app.models import FacebookReview
- nlu_server_url = getattr(settings, 'NLU_SERVER_URI')
- ner_model_url = getattr(settings, 'NER_SERVER_URI')
- def filter_with_last_ten_reviews(location_id, replies, platform='google'):
- replies = list(replies)
- if platform == 'google':
- revs = Review.objects.filter(location_id=location_id).exclude(reply=None).order_by('-update_time')[:10]
- elif platform == 'facebook':
- revs = FacebookReview.objects.filter(page__location_id=location_id).\
- exclude(facebookreviewreply=None).order_by('create_time')[:10]
- for r in revs:
- if platform == 'google':
- s1 = r.reply.replied_text
- elif platform == 'facebook':
- s1 = r.facebookreviewreply.replied_text
- for rep in replies:
- s2 = rep.reply
- similarity = SequenceMatcher(None, s1, s2).ratio()
- if similarity > 0.7:
- replies.remove(rep)
- print('%.2f'%similarity, ' -------------- ', rep.reply_category)
- return replies
- def clean_text(text):
- # replace some letter in text for getting better performance
- text = re.sub(r':\s*', ' ', text)
- text = re.sub(r'&', ', ', text)
- text = re.sub(r'/', ', ', text)
- text = re.sub(r'\.*\n\.*', ', ', text)
- text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
- emoji_pattern = re.compile("["
- u"\U0001F600-\U0001F64F" # emoticons
- u"\U0001F300-\U0001F5FF" # symbols & pictographs
- u"\U0001F680-\U0001F6FF" # transport & map symbols
- u"\U0001F1E0-\U0001F1FF" # flags (iOS)
- "]+", flags=re.UNICODE)
- text = re.sub(emoji_pattern, ' ', text)
- return text
- def model_inference(text):
- # url = nlu_server_url + '/model/parse'
- url = nlu_server_url + '/predict'
- text = clean_text(text)
- payload = {'text': text}
- headers = {'content-type': 'application/json'}
- response = post(url, data=json.dumps(payload), headers=headers)
- if response.status_code == 200:
- return response.json()
- return response
- def is_a_name(name):
- '''
- function that decide whether it is a person name or not
- :param -> a string usually reviewer name:
- :return -> a boolean True/False:
- '''
- url = ner_model_url + '/name'
- payload = {
- 'name': name
- }
- headers = {'content-type': 'application/json'}
- res = requests.post(url, data=json.dumps(payload), headers=headers).json()
- return res.get('name')
- # This function will not use anymore
- def analyze_inference(response):
- '''
- response has four property
- ['intent', 'entities', 'intent_ranking', 'text']
- we took all intents that has more than 10% of intent confident.
- all the intents that has bellow confidence has been omitted.
- :param response: JSON -> a json response that RASA NLU server respond.
- :return: DICT ->dictionary with key of intent and value of it's confident.
- '''
- res_intents = response.get('intent_ranking')
- intents = {}
- for intent in res_intents:
- key = intent.get('name')
- values = intent.get('confidence')
- if values > 0.1:
- intents[key] = int(values*100)
- return intents
- def analyze_model_inference(response):
- res_intents = response.get('response').get('intents')
- sorted_dict = dict(sorted(res_intents.items(), key=operator.itemgetter(1), reverse=True))
- intents = {}
- for key, value in sorted_dict.items():
- value = float(value)
- if value > 0.1:
- intents[key] = int(value*100)
- return intents
|