Преглед на файлове

Create a models for yelp reviews

Mohidul Islam преди 5 години
родител
ревизия
6aebc5697d
променени са 10 файла, в които са добавени 130 реда и са изтрити 1 реда
  1. 2 1
      review_automation/settings/base.py
  2. 0 0
      yelp/__init__.py
  3. 11 0
      yelp/admin.py
  4. 5 0
      yelp/apps.py
  5. 36 0
      yelp/migrations/0001_initial.py
  6. 0 0
      yelp/migrations/__init__.py
  7. 22 0
      yelp/models.py
  8. 48 0
      yelp/scrapper.py
  9. 3 0
      yelp/tests.py
  10. 3 0
      yelp/views.py

+ 2 - 1
review_automation/settings/base.py

@@ -28,6 +28,7 @@ PROJECTS_APPS = [
     'nlu_job.apps.NluJobConfig',
     'analytics.apps.AnalyticsConfig',
     'name_extractor.apps.NameExtractorConfig',
+    'yelp.apps.YelpConfig',
 ]
 
 INSTALLED_APPS = DJANGO_DEFAULT_APPS + THIRD_PARTY_APPS + PROJECTS_APPS
@@ -65,7 +66,7 @@ TEMPLATES = [
 
 LANGUAGE_CODE = 'en-us'
 
-TIME_ZONE = 'Asia/Dhaka'
+TIME_ZONE = 'UTC'
 
 USE_I18N = True
 

+ 0 - 0
yelp/__init__.py


+ 11 - 0
yelp/admin.py

@@ -0,0 +1,11 @@
+from django.contrib import admin
+
+from .models import YelpLocation, YelpReview
+
+
+class YelpLocationAdmin(admin.ModelAdmin):
+    list_display = ['location_id', 'url']
+
+
+admin.site.register(YelpLocation, YelpLocationAdmin)
+admin.site.register(YelpReview)

+ 5 - 0
yelp/apps.py

@@ -0,0 +1,5 @@
+from django.apps import AppConfig
+
+
+class YelpConfig(AppConfig):
+    name = 'yelp'

+ 36 - 0
yelp/migrations/0001_initial.py

@@ -0,0 +1,36 @@
+# Generated by Django 3.0.4 on 2020-06-08 08:59
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+        ('gauth', '0012_auto_20200512_1212'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='YelpLocation',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('url', models.URLField()),
+                ('location', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='gauth.Location')),
+            ],
+        ),
+        migrations.CreateModel(
+            name='YelpReview',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('reviewer_name', models.CharField(max_length=255)),
+                ('profile', models.URLField()),
+                ('rating', models.IntegerField()),
+                ('date_posted', models.DateField()),
+                ('comment', models.TextField()),
+                ('location', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='yelp.YelpLocation')),
+            ],
+        ),
+    ]

+ 0 - 0
yelp/migrations/__init__.py


+ 22 - 0
yelp/models.py

@@ -0,0 +1,22 @@
+from django.db import models
+from gauth.models import Location
+
+
+class YelpLocation(models.Model):
+    location = models.OneToOneField(Location, on_delete=models.CASCADE)
+    url = models.URLField()
+
+    def __str__(self):
+        return self.location.care_name
+
+
+class YelpReview(models.Model):
+    reviewer_name = models.CharField(max_length=255)
+    profile = models.URLField()
+    rating = models.IntegerField()
+    date_posted = models.DateField()
+    comment = models.TextField()
+    location = models.ForeignKey(YelpLocation, on_delete=models.CASCADE)
+
+    def __str__(self):
+        return self.reviewer_name

+ 48 - 0
yelp/scrapper.py

@@ -0,0 +1,48 @@
+import requests
+import datetime
+from django.utils import timezone
+from bs4 import BeautifulSoup as bSoup
+
+pages_start = [i*20 for i in range(10)]
+
+BASE_URL = 'https://www.yelp.com/biz/signaturecare-emergency-center-montrose-houston-2'
+PARAMETER = '?sort_by=date_desc&start='
+
+
+def date_string2timezone(date):
+    month, day, year = map(int,  date.split('/'))
+    date = datetime.datetime(day=day, month=month, year=year)
+    time_zone = timezone.make_aware(date)
+    return time_zone
+
+
+def scrape_reviews(location, n_pages):
+    start_params = pages_start[:n_pages]
+
+    review_collection = []
+    for start in start_params:
+        url = BASE_URL + PARAMETER + str(start)
+        response = requests.get(url).text
+        html_soup = bSoup(response, 'html.parser')
+
+        review_items = html_soup.findAll('li', class_='lemon--li__373c0__1r9wz margin-b3__373c0__q1DuY padding-b3__373c0__342DA border--bottom__373c0__3qNtD border-color--default__373c0__3-ifU')
+
+        # A single review content
+        for review_content in review_items:
+            profile = review_content.find('a', class_='lemon--a__373c0__IEZFH link__373c0__1G70M link-color--inherit__373c0__3dzpk link-size--inherit__373c0__1VFlE')
+            name = profile.getText()
+            profile_link = profile.get('href')
+            rating_div = review_content.find('span', class_='lemon--span__373c0__3997G display--inline__373c0__3JqBP border-color--default__373c0__3-ifU')
+            rating = int(rating_div.div.get('aria-label').split()[0])
+            date = review_content.find('span', class_='lemon--span__373c0__3997G text__373c0__2Kxyz text-color--mid__373c0__jCeOG text-align--left__373c0__2XGa-').text
+            review_text = review_content.find('span', class_='lemon--span__373c0__3997G raw__373c0__3rKqk').text
+            review_resource = {
+                'name': name,
+                'profile': 'https://www.yelp.com' + profile_link,
+                'rating': rating,
+                'date_posted': date,
+                'messege': review_text,
+                'location': location
+            }
+            review_collection.append(review_resource)
+    return review_collection

+ 3 - 0
yelp/tests.py

@@ -0,0 +1,3 @@
+from django.test import TestCase
+
+# Create your tests here.

+ 3 - 0
yelp/views.py

@@ -0,0 +1,3 @@
+from django.shortcuts import render
+
+# Create your views here.