utils.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. import datetime
  2. import random
  3. from time import sleep
  4. from django.utils import timezone
  5. from django.db.models import Max
  6. from .models import YelpReview, YelpLocation
  7. from .scrapper import scrape_reviews_using_browser
  8. def date_string2timezone(date):
  9. month, day, year = map(int, date.split('/'))
  10. date = datetime.datetime(day=day, month=month, year=year)
  11. time_zone = timezone.make_aware(date)
  12. return time_zone
  13. def get_max_date(yelp_location):
  14. max_date = yelp_location.yelpreview_set.all()\
  15. .aggregate(Max('date_posted'))['date_posted__max']
  16. return max_date if max_date is not None else date_string2timezone('7/2/1995')
  17. def store_into_database(reviews, location):
  18. for rev in reviews:
  19. name = rev.get('name')
  20. profile = rev.get('profile')
  21. rating = rev.get('rating')
  22. date = rev.get('date_posted')
  23. date_posted = date_string2timezone(date)
  24. comment = rev.get('comment')
  25. # store into database
  26. obj, created = YelpReview.objects.update_or_create(
  27. reviewer_name=name,
  28. profile=profile,
  29. rating=rating,
  30. date_posted=date_posted,
  31. comment=comment,
  32. location=location
  33. )
  34. if created:
  35. print(f'A new review object has been created for {location}!')
  36. def populate_yelp_reviews():
  37. yelp_locations = YelpLocation.objects.all()
  38. for yl in yelp_locations:
  39. reviews = scrape_reviews_using_browser(location_url=yl.url)
  40. store_into_database(reviews, yl)
  41. # pause for 30-60 sec to make request more human-like.
  42. sleep(random.randint(30, 60))