12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- import datetime
- import random
- from time import sleep
- from django.utils import timezone
- from django.db.models import Max
- from .models import YelpReview, YelpLocation
- from .scrapper import scrape_reviews
- def date_string2timezone(date):
- month, day, year = map(int, date.split('/'))
- date = datetime.datetime(day=day, month=month, year=year)
- time_zone = timezone.make_aware(date)
- return time_zone
- def get_max_date(yelp_location):
- max_date = yelp_location.yelpreview_set.all()\
- .aggregate(Max('date_posted'))['date_posted__max']
- return max_date if max_date is not None else date_string2timezone('7/2/1995')
- def store_into_database(reviews, location):
- for rev in reviews:
- name = rev.get('name')
- profile = rev.get('profile')
- rating = rev.get('rating')
- date = rev.get('date_posted')
- date_posted = date_string2timezone(date)
- comment = rev.get('comment')
- # store into database
- obj, created = YelpReview.objects.update_or_create(
- reviewer_name=name,
- profile=profile,
- rating=rating,
- date_posted=date_posted,
- comment=comment,
- location=location
- )
- if created:
- print(f'A new review object has been created for {location}!')
- else:
- print(f'Review already exist')
- def populate_yelp_reviews():
- yelp_locations = YelpLocation.objects.all()
- for yl in yelp_locations:
- reviews = scrape_reviews(location_url=yl.url, n_pages=1)
- if reviews:
- store_into_database(reviews, yl)
- # pause for 30-60 sec to make request more human-like.
- sleep(random.randint(30, 60))
|