我正在尝试进行网上抓取,并且目前停留在如何继续执行代码上。我正在尝试创建一个刮擦前80个Yelp的代码!评论。由于每页只有20条评论,因此我也想弄清楚如何创建一个循环来将网页更改为下20条评论。
from bs4 import BeautifulSoup
import requests
import time
all_reviews = ''
def get_description(pullman):
url = f'https://www.yelp.com/biz/pullman-bar-and-diner-iowa-city'
# get webpage data from url
response = requests.get(url)
#sleep for 2 seconds
time.sleep(2)
# get html document from web page data
html_doc = response.text
# parser
soup = BeautifulSoup(html_doc,"lxml")
page_title = soup.title.text
#get a tag content based on class
p_tag = soup.find_all('p',class_='lemon--p__373c0__3Qnnj text__373c0__2pB8f comment__373c0__3EKjH text-color--normal__373c0__K_MKN text-align--left__373c0__2pnx_')[0]
#print the text within the tag
return p_tag.text