Explorar el Código

add fast-selenium api to reply facebook and scrape yelp reviews

Mohidul Islam hace 4 años
padre
commit
8f0409d01a

+ 11 - 35
facebook_app/review_utils.py

@@ -1,10 +1,11 @@
 import requests
+import json
 from .models import FacebookPage, FacebookReview, FacebookReviewReply
 
 from django.conf import settings
 
 # browser instance
-browser = getattr(settings, 'BROWSER', None)
+browser_uri = getattr(settings, 'BROWSER_URI', None)
 
 BASE_URL = 'https://graph.facebook.com'
 
@@ -97,37 +98,12 @@ def populate_facebook_reviews():
         store_reviews_into_db(page_id=page_id.get('id'), n_reviews=15)
 
 
-def reply2facebook_review(review_id, reply, like=False, love=False):
-    # switch to mobile view which not support any JavaScript
-    base_url = 'https://mbasic.facebook.com/'
-    url = base_url + str(review_id)
-    browser.get(url)
-
-    # like the review if needed
-    if like:
-        like_btn = browser.find_element_by_xpath(
-            '/html/body/div/div/div[2]/div/div[1]/div[2]/div/div[1]/table/tbody/tr/td[1]/a')
-        assert like_btn.text == 'Like'
-        like_btn.click()
-
-    # love the review if needed
-    if love:
-        react_btn = browser.find_element_by_xpath(
-            '/html/body/div/div/div[2]/div/div[1]/div[2]/div/div[1]/table/tbody/tr/td[2]/a')
-        assert react_btn.text == 'React'
-        react_btn.click()
-        love_btn = browser.find_element_by_xpath(
-            '/html/body/div/div/div[2]/div/table/tbody/tr/td/ul/li[2]/table/tbody/tr/td/a/div/table/tbody/tr/td[2]')
-        assert love_btn.text == 'Love'
-        love_btn.click()
-
-    # reply the review
-    reply_field = browser.find_element_by_id('composerInput')
-    assert reply_field.get_attribute('name') == 'comment_text'
-    reply_field.send_keys(reply)
-    ins = browser.find_elements_by_tag_name('input')
-    for x in ins:
-        if 'omment' in x.get_attribute('value'):
-            x.click()
-            break
-
+def reply2facebook_review(review_id, reply):
+    url = browser_uri + f'/facebook/reply'
+    payload = {
+        'id': review_id,
+        'reply': reply
+    }
+    headers = {'content-type': 'application/json'}
+    response = requests.post(url, data=json.dumps(payload), headers=headers)
+    return response.status_code

+ 18 - 12
facebook_app/views.py

@@ -23,20 +23,26 @@ class FacebookReviews(LoginRequiredMixin, View):
             replied_text = form.cleaned_data.get('reply')
         review_id = self.request.POST['review_id']
         print('review-id', review_id)
-        # TODO: Reply this review using selenium...
-        reply2facebook_review(
+        status_code = reply2facebook_review(
             review_id=review_id,
-            reply=replied_text,
-            like=True
+            reply=replied_text
         )
-
-        FacebookReviewReply.objects.create(
-            review_id=review_id,
-            create_time=timezone.now(),
-            replied_text=replied_text
-        )
-        messages.success(request, f'Your reply has been posted!')
-
+        if status_code == 201:
+            FacebookReviewReply.objects.create(
+                review_id=review_id,
+                create_time=timezone.now(),
+                replied_text=replied_text
+            )
+            messages.success(request, f'Your reply has been posted!')
+        elif status_code == 404:
+            FacebookReviewReply.objects.create(
+                review_id=review_id,
+                create_time=timezone.now(),
+                replied_text=replied_text
+            )
+            messages.warning(request, f'User might delete this reply.!')
+        else:
+            messages.error(request, 'Something going wrong please notify the developers.')
         return redirect('facebook-reviews')
 
     def get(self, request, *args, **kwargs):

+ 3 - 3
review_automation/settings/config.py

@@ -16,13 +16,13 @@ TOKEN_URI = "https://oauth2.googleapis.com/token"
 
 # Configure Email Server
 EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend'
-
 EMAIL_USE_TLS = True
-
+EMAIL_HOST_USER = 'webmaster@ercare24.com'
+EMAIL_HOST_PASSWORD = 'Webdev#7182'
 EMAIL_HOST = 'smtp.gmail.com'
-
 EMAIL_PORT = 587
 
+
 ADMIN_MAINTAINER_EMAILS = [
     'hannan@ercare24.com'
     ]

+ 1 - 1
yelp/analytics.py

@@ -17,7 +17,7 @@ def send_email_bad_reviews():
     locations = Location.objects.all()
     for location in locations:
         to = settings.ADMIN_MAINTAINER_EMAILS
-        date = timezone.now() - timezone.timedelta(days=1)
+        date = timezone.now() - timezone.timedelta(hours=12)
         reviews = YelpReview.objects.filter(
             location__location_id=location.location_id,
             date_posted__gte=date,

+ 2 - 2
yelp/background_job.py

@@ -1,7 +1,7 @@
-from .store_reviews import populate_reviews
+from .utils import populate_yelp_reviews
 from .analytics import send_email_bad_reviews
 
 
 def scheduled_task():
-    populate_reviews()
+    populate_yelp_reviews()
     send_email_bad_reviews()

+ 17 - 14
yelp/scrapper.py

@@ -2,13 +2,14 @@ import requests
 import random
 from time import sleep
 from bs4 import BeautifulSoup as bSoup
-
-from .utils import date_string2timezone
+from django.conf import settings
 
 pages_start = [i*20 for i in range(10)]
 
 PARAMETER = '?sort_by=date_desc&start='
 
+BROWSER = getattr(settings, 'BROWSER_URI')
+
 
 headers = {
     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:50.0) Gecko/20100101 Firefox/50.0',
@@ -34,7 +35,6 @@ def scrape_reviews(location_url, max_date, n_pages):
 
         review_items = html_soup.findAll('li', class_='lemon--li__373c0__1r9wz margin-b3__373c0__q1DuY padding-b3__373c0__342DA border--bottom__373c0__3qNtD border-color--default__373c0__3-ifU')
 
-
         # A single review content
         if not review_items:
             return None
@@ -45,16 +45,19 @@ def scrape_reviews(location_url, max_date, n_pages):
             rating_div = review_content.find('span', class_='lemon--span__373c0__3997G display--inline__373c0__3JqBP border-color--default__373c0__3-ifU')
             rating = int(rating_div.div.get('aria-label').split()[0])
             date_posted = review_content.find('span', class_='lemon--span__373c0__3997G text__373c0__2Kxyz text-color--mid__373c0__jCeOG text-align--left__373c0__2XGa-').text
-            date = date_string2timezone(date_posted)
             review_text = review_content.find('span', class_='lemon--span__373c0__3997G raw__373c0__3rcx7').text
-
-            if date > max_date:
-                review = {
-                    'name': name,
-                    'profile': profile_link,
-                    'rating': rating,
-                    'date_posted': date,
-                    'comment': review_text,
-                }
-                yelp_reviews.append(review)
+            review = {
+                'name': name,
+                'profile': profile_link,
+                'rating': rating,
+                'date_posted': date_posted,
+                'comment': review_text,
+            }
+            yelp_reviews.append(review)
     return yelp_reviews
+
+
+def scrape_reviews_using_browser(location_url):
+    url = BROWSER + f'/yelp/reviews?url={location_url}'
+    response = requests.get(url).json()
+    return response.get('reviews')

+ 14 - 1
yelp/utils.py

@@ -1,8 +1,11 @@
 import datetime
+import random
+from time import sleep
 from django.utils import timezone
 from django.db.models import Max
 
 from .models import YelpReview, YelpLocation
+from .scrapper import scrape_reviews_using_browser
 
 
 def date_string2timezone(date):
@@ -23,7 +26,8 @@ def store_into_database(reviews, location):
         name = rev.get('name')
         profile = rev.get('profile')
         rating = rev.get('rating')
-        date_posted = rev.get('date_posted')
+        date = rev.get('date_posted')
+        date_posted = date_string2timezone(date)
         comment = rev.get('comment')
 
         # store into database
@@ -37,3 +41,12 @@ def store_into_database(reviews, location):
         )
         if created:
             print(f'A new review object has been created for {location}!')
+
+
+def populate_yelp_reviews():
+    yelp_locations = YelpLocation.objects.all()
+    for yl in yelp_locations:
+        reviews = scrape_reviews_using_browser(location_url=yl.url)
+        store_into_database(reviews, yl)
+        # pause for 30-60 sec to make request more human-like.
+        sleep(random.randint(30, 60))