|
@@ -27,6 +27,7 @@ def scrape_reviews(location_url, n_pages):
|
|
|
|
|
|
for start in start_params:
|
|
for start in start_params:
|
|
url = location_url + PARAMETER + str(start)
|
|
url = location_url + PARAMETER + str(start)
|
|
|
|
+ print(f"Collecting reviews from {location_url.split('/')[-1]}: {start}")
|
|
browser = mechanicalsoup.StatefulBrowser()
|
|
browser = mechanicalsoup.StatefulBrowser()
|
|
browser.open(url)
|
|
browser.open(url)
|
|
|
|
|
|
@@ -34,8 +35,8 @@ def scrape_reviews(location_url, n_pages):
|
|
sleep(random.randint(30, 60))
|
|
sleep(random.randint(30, 60))
|
|
html_soup = browser.page
|
|
html_soup = browser.page
|
|
|
|
|
|
- review_items = html_soup.findAll('li', class_='margin-b5__373c0__2ErL8')
|
|
|
|
-
|
|
|
|
|
|
+ review_items = html_soup.findAll('li', class_='margin-b5__373c0__3ho0z')
|
|
|
|
+ print(f'Got {len(review_items)} reviews in this Loc.')
|
|
# A single review content
|
|
# A single review content
|
|
if not review_items:
|
|
if not review_items:
|
|
return None
|
|
return None
|
|
@@ -45,10 +46,10 @@ def scrape_reviews(location_url, n_pages):
|
|
continue
|
|
continue
|
|
name = user_detail_div.a.getText()
|
|
name = user_detail_div.a.getText()
|
|
profile_link = user_detail_div.a.attrs.get('href')
|
|
profile_link = user_detail_div.a.attrs.get('href')
|
|
- rating_div = review.find('div', class_='i-stars__373c0__1T6rz')
|
|
|
|
|
|
+ rating_div = review.find('div', class_='i-stars__373c0___sZu0')
|
|
rating = int(rating_div.attrs.get('aria-label').split()[0])
|
|
rating = int(rating_div.attrs.get('aria-label').split()[0])
|
|
date_posted = review.find('span', class_='css-e81eai').getText()
|
|
date_posted = review.find('span', class_='css-e81eai').getText()
|
|
- comment_p = review.find('p', class_='comment__373c0__1M-px css-n6i4z7')
|
|
|
|
|
|
+ comment_p = review.find('p', class_='comment__373c0__Nsutg css-n6i4z7')
|
|
review_text = comment_p.span.getText()
|
|
review_text = comment_p.span.getText()
|
|
review_dict = {
|
|
review_dict = {
|
|
'name': name,
|
|
'name': name,
|