Browse Source

filter comment before storing

Gogs 4 years ago
parent
commit
57b948649d

+ 1 - 1
analytics/background_job.py

@@ -26,7 +26,7 @@ def send_email(subject, message_body, to_list, sender=SENDER, cc=CC):
 
 def send_email_weekly_summary():
     locations = Location.objects.all()
-    # locations = Location.objects.filter(location_id='12541597562633926366')
+   # locations = Location.objects.filter(location_id='12541597562633926366')
     for location in locations:
         to_list = list(LocationManager.objects.filter(location_id=location.location_id).values_list('email', flat=True))
         subject = f"Weekly report for {location.care_name}."

+ 1 - 0
dashboard/templates/base.html

@@ -31,6 +31,7 @@
             <div class="navbar-nav">
                 <a class="nav-item nav-link" href="{% url 'analytics' %}">Analytics</a>
                 <a class="nav-item nav-link" href="{% url 'leader-board' 12345 %}">Leaderboard</a>
+                <a class="nav-item nav-link" href="{% url 'logout' %}">Logout</a>
             </div>
           </div>
           {% endif %}

+ 19 - 1
facebook_app/review_utils.py

@@ -1,5 +1,7 @@
+import re
 import requests
 import json
+from functools import reduce
 from .models import FacebookPage, FacebookReview, FacebookReviewReply
 
 from django.conf import settings
@@ -12,6 +14,19 @@ BASE_URL = 'https://graph.facebook.com'
 
 RECOMMENDATION_STR2NUM = {'negative': False, 'positive': True}
 
+def remove_emoji(string):
+    emoji_pattern = re.compile("["
+                           u"\U0001F600-\U0001F64F"  # emoticons
+                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
+                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+                           u"\U00002702-\U000027B0"
+                           u"\U000024C2-\U0001F251"
+                           "]+", flags=re.UNICODE)
+    return emoji_pattern.sub(r'', string)
+
+def filter_4bytechar(text):
+    return reduce(lambda x,y:x+y  ,filter(lambda x: len(x.encode('utf8'))<4, text))
 
 def get_page_reviews(page_id, n_reviews):
     token = FacebookPage.objects.get(id=page_id).token
@@ -66,6 +81,9 @@ def store_reviews_into_db(page_id, n_reviews=25):
         '''
         review_id = graph_story.get('id')
         message = graph_story.get('message')
+        message = ' '.join(message.split())
+        message = remove_emoji(message)
+        message = filter_4bytechar(message)
         start_time = graph_story.get('start_time')
         recommendation_type = RECOMMENDATION_STR2NUM[graph_story.get('data')['recommendation_type']]
 
@@ -94,7 +112,7 @@ def store_reviews_into_db(page_id, n_reviews=25):
 def populate_facebook_reviews():
     page_ids = FacebookPage.objects.values('id')
     for page_id in page_ids:
-        store_reviews_into_db(page_id=page_id.get('id'), n_reviews=20)
+        store_reviews_into_db(page_id=page_id.get('id'), n_reviews=10)
 
 
 def reply2facebook_review(review_id, reply):

+ 0 - 9
nohup.out

@@ -1,9 +0,0 @@
-Watching for file changes with StatReloader
-[21/Jun/2020 14:28:27] "GET / HTTP/1.1" 200 25623
-[21/Jun/2020 14:28:30] "GET /dashboard/ HTTP/1.1" 200 25623
-[21/Jun/2020 15:19:57] "GET / HTTP/1.1" 200 25623
-[21/Jun/2020 15:20:06] "GET /?page=2 HTTP/1.1" 200 13500
-[21/Jun/2020 15:20:09] "GET /dashboard/ HTTP/1.1" 200 25623
-[21/Jun/2020 16:10:30] "GET / HTTP/1.1" 200 25623
-[21/Jun/2020 16:10:36] "GET /dashboard/ HTTP/1.1" 200 25623
-[21/Jun/2020 16:41:10] "GET / HTTP/1.1" 200 25623

+ 0 - 1
requirements.txt

@@ -12,7 +12,6 @@ django-crispy-forms==1.9.0
 django-crontab==0.7.1
 django-tempus-dominus==5.1.2.13
 djangorestframework==3.11.0
-en-core-web-sm==2.3.1
 et-xmlfile==1.0.1
 facebook-sdk==3.1.0
 google-auth==1.11.2

+ 26 - 1
review/review_utils.py

@@ -1,6 +1,7 @@
 import re
 import time
 import json
+from functools import reduce
 from requests import get, put, post
 from gauth.auth_utils import get_gmb_id, get_auth_header
 from .models import Review, Reply
@@ -13,6 +14,22 @@ STAR_REVIEW_NUM = {'STAR_RATING_UNSPECIFIED': 0, 'ONE': 1, 'TWO': 2, 'THREE': 3,
 BASE_URL = f'https://mybusiness.googleapis.com/v4/'
 
 
+def remove_emoji(string):
+    emoji_pattern = re.compile("["
+                           u"\U0001F600-\U0001F64F"  # emoticons
+                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
+                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+                           u"\U00002702-\U000027B0"
+                           u"\U000024C2-\U0001F251"
+                           "]+", flags=re.UNICODE)
+    return emoji_pattern.sub(r'', string)
+
+
+def filter_4bytechar(text):
+    return reduce(lambda x,y:x+y  ,filter(lambda x: len(x.encode('utf8'))<4, text))
+
+
 def clean_comment(text):
     rules = [
         {r'[^\x00-\x7F]+': ''},
@@ -63,7 +80,12 @@ def insert_review_into_database(reviews, loc_id):
             review = Review.objects.get(pk=review_id)
         except Review.DoesNotExist:
             review = Review(review_id=review_id)
-        review.comment = rev.get('comment')
+        comment = rev.get('comment')
+        if comment:
+            comment = clean_comment(comment)
+            comment = remove_emoji(comment)
+            comment = filter_4bytechar(comment)
+        review.comment = comment
         review.create_time = rev.get('createTime')
         review.update_time = rev.get('updateTime')
         review.star_rating = STAR_REVIEW_NUM[rev.get('starRating')]
@@ -138,6 +160,9 @@ def store_batch_of_reviews(reviews):
             review = Review(review_id=review_id)
         comment = rev.get('comment')
         if comment:
+            comment = clean_comment(comment)
+            comment = remove_emoji(comment)
+            comment = filter_4bytechar(comment)
             review.comment = clean_comment(comment)
         review.create_time = rev.get('createTime')
         review.update_time = rev.get('updateTime')

File diff suppressed because it is too large
+ 3 - 3
review_automation.sql


+ 1 - 1
review_automation/settings/dev.py

@@ -22,7 +22,7 @@ DATABASES = {
 
 # Host URLS
 
-HOST_URI = "http://127.0.0.1:8000"
+HOST_URI = "http://localhost:8000"
 NLU_SERVER_URI = 'http://localhost:1996'
 NER_SERVER_URI = 'http://localhost:2020'
 BROWSER_URI = 'http://localhost:2021'

+ 2 - 2
review_automation/settings/facebook_conf.py

@@ -1,3 +1,3 @@
-SHORT_LIVED_USER_TOKEN = 'EAAmiE8MkZARMBAAfLz2qjjSwh18mtVLYYDFoaiERBcETHE3V5Mv3YyYBxlTtkUdfnMe6SZBKLklh5VeAx4LBfpG7aP6iVfZB5Kmsf6UESLVXi8oXxhNuDuDqZCDZBZBvjB78je7V7QzMeMQoSgcBBDCBlbtO2ZA2obhfEjxr8MxZBAI08mucAlouNxFrw3PDlO8ZAZBgjCy920ZBDXA0qWok0lG'
+SHORT_LIVED_USER_TOKEN = 'EAAmiE8MkZARMBAH8trWUcvy6VIhkl08CLUUfDS81tk0Ac7IZCzdTuPvZAUu68DjhRD4F6kSAojlBF1L6KREaEcN1msqFyYYIq6jtR3Q03DYhXvmqGZBQDX7XKwwuNrmDBYqaCkBZBr2SjD8oRCkyF7ULstemJUlmOgDVtsVgOZB37mZBWS5RwIpuUGH2eoTYk7ziITd06k7APb0TQPMxhSOnMbXPyFy7UyiizAOMZCR8eNNs1MkwSBZC9ZBQ5hr1Ow94AZD'
 APP_ID = '2711480552416531'
-APP_SECRET = 'c53b39227e3d3b5ffbff12aa8d6fc757'
+APP_SECRET = 'c53b39227e3d3b5ffbff12aa8d6fc757'

+ 2 - 2
review_automation/settings/prod.py

@@ -3,7 +3,7 @@ from .base import *
 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = False
 
-ALLOWED_HOSTS = ['10.0.0.36', 'bytetrek.com', 'localhost']
+ALLOWED_HOSTS = ['178.128.152.51', '10.0.0.36', 'bytetrek.com', 'localhost']
 
 WSGI_APPLICATION = 'review_automation.wsgi.prod.application'
 
@@ -39,7 +39,7 @@ AUTH_PASSWORD_VALIDATORS = [
 
 # Host URLS
 
-HOST_URI = "http://10.0.0.36:5005"
+HOST_URI = "http://178.128.152.51:8000"
 NLU_SERVER_URI = 'http://localhost:1996'
 NER_SERVER_URI = 'http://localhost:2020'
 BROWSER_URI = 'http://localhost:2021'

File diff suppressed because it is too large
+ 0 - 15
review_data_set.csv


+ 0 - 21
signature_care_review_url.json

@@ -1,21 +0,0 @@
-{
-  "paris": "https://business.google.com/reviews/l/07304194084987550691",
-  "killeen": "https://business.google.com/reviews/l/06337807787071309349",
-  "cypress": "https://business.google.com/reviews/l/01483383084396759199",
-  "texarkana": "https://business.google.com/reviews/l/02576965498507879079",
-  "odessa": "https://business.google.com/reviews/l/13372632116293573150",
-  "katy freeway": "https://business.google.com/reviews/l/05391939194652210827",
-  "college station": "https://business.google.com/reviews/l/17859229546895147986",
-  "austin": "https://business.google.com/reviews/l/11984394797109150957",
-  "stafford": "https://business.google.com/reviews/l/08172787910197778647",
-  "lewisville": "https://business.google.com/reviews/l/11975419119926401473",
-  "memorial drive": "https://business.google.com/reviews/l/16345542658869515062",
-  "montrose": "https://business.google.com/reviews/l/02110922445521262937",
-  "westchase": "https://business.google.com/reviews/l/12648617844590408918",
-  "memorial city": "https://business.google.com/reviews/l/03060473272549250064",
-  "mission bend": "https://business.google.com/reviews/l/07409147463931793039",
-  "heights": "https://business.google.com/reviews/l/15919701512914286041",
-  "copperfield": "https://business.google.com/reviews/l/01724936860578366174",
-  "bellaire": "https://business.google.com/reviews/l/15616344870880778263",
-  "midland": "https://business.google.com/reviews/l/12517976417346206005"
-}

Some files were not shown because too many files changed in this diff