소스 검색

Chage yelp review class

Mohidul Islam 3 년 전
부모
커밋
07fb27a1c8
2개의 변경된 파일6개의 추가작업 그리고 5개의 파일을 삭제
  1. 5 4
      yelp/scrapper.py
  2. 1 1
      yelp/utils.py

+ 5 - 4
yelp/scrapper.py

@@ -27,6 +27,7 @@ def scrape_reviews(location_url, n_pages):
 
     for start in start_params:
         url = location_url + PARAMETER + str(start)
+        print(f"Collecting reviews from {location_url.split('/')[-1]}: {start}")
         browser = mechanicalsoup.StatefulBrowser()
         browser.open(url)
 
@@ -34,8 +35,8 @@ def scrape_reviews(location_url, n_pages):
         sleep(random.randint(30, 60))
         html_soup = browser.page
 
-        review_items = html_soup.findAll('li', class_='margin-b5__373c0__2ErL8')
-
+        review_items = html_soup.findAll('li', class_='margin-b5__373c0__3ho0z')
+        print(f'Got {len(review_items)} reviews in this Loc.')
         # A single review content
         if not review_items:
             return None
@@ -45,10 +46,10 @@ def scrape_reviews(location_url, n_pages):
                 continue
             name = user_detail_div.a.getText()
             profile_link = user_detail_div.a.attrs.get('href')
-            rating_div = review.find('div', class_='i-stars__373c0__1T6rz')
+            rating_div = review.find('div', class_='i-stars__373c0___sZu0')
             rating = int(rating_div.attrs.get('aria-label').split()[0])
             date_posted = review.find('span', class_='css-e81eai').getText()
-            comment_p = review.find('p', class_='comment__373c0__1M-px css-n6i4z7')
+            comment_p = review.find('p', class_='comment__373c0__Nsutg css-n6i4z7')
             review_text = comment_p.span.getText()
             review_dict = {
                 'name': name,

+ 1 - 1
yelp/utils.py

@@ -52,4 +52,4 @@ def populate_yelp_reviews():
         if reviews:
             store_into_database(reviews, yl)
         # pause for 30-60 sec to make request more human-like.
-        sleep(random.randint(30, 60))
+        sleep(random.randint(15, 30))