import datetime import random from time import sleep from django.utils import timezone from django.db.models import Max from .models import YelpReview, YelpLocation from .scrapper import scrape_reviews def date_string2timezone(date): month, day, year = map(int, date.split('/')) date = datetime.datetime(day=day, month=month, year=year) time_zone = timezone.make_aware(date) return time_zone def get_max_date(yelp_location): max_date = yelp_location.yelpreview_set.all()\ .aggregate(Max('date_posted'))['date_posted__max'] return max_date if max_date is not None else date_string2timezone('7/2/1995') def store_into_database(reviews, location): for rev in reviews: name = rev.get('name') profile = rev.get('profile') rating = rev.get('rating') date = rev.get('date_posted') date_posted = date_string2timezone(date) comment = rev.get('comment') # store into database obj, created = YelpReview.objects.update_or_create( reviewer_name=name, profile=profile, rating=rating, date_posted=date_posted, comment=comment, location=location ) if created: print(f'A new review object has been created for {location}!') else: print(f'Review already exist') def populate_yelp_reviews(): yelp_locations = YelpLocation.objects.all() for yl in yelp_locations: reviews = scrape_reviews(location_url=yl.url, n_pages=1) if reviews: store_into_database(reviews, yl) # pause for 30-60 sec to make request more human-like. sleep(random.randint(30, 60))