提取所有评论与评级和链接import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
def add_reviews(s, soup, results):
for review in soup.select('.review-article'):
info = review.select_one('a')
identifier = review.select_one('[reviewid]')['reviewid']
data['reviewid'] = identifier
title = info.text
link = info['href']
rating = len(review.select('.rated-star'))
r = s.post('https://www.mouthshut.com/review/CorporateResponse.ashx', data)
soup2 = bs(r.content, 'lxml')
review = ' '.join([i.text for i in soup2.select('p')])
row = [title, link, rating, review]
results.append(row)
url = 'https://www.mouthshut.com/product-reviews/ICICI-Lombard-Auto-Insurance-reviews-925641018-page-{}'
data = {'type': 'review', 'reviewid': '', 'catid': '925641018', 'corp': 'false', 'catname': ''}
results = []
with requests.Session() as s:
r = s.get('https://www.mouthshut.com/product-reviews/ICICI-Lombard-Auto-Insurance-reviews-925641018')
soup = bs(r.content, 'lxml')
pages = int(soup.select('#spnPaging .btn-link')[-1].text)
add_reviews(s, soup, results)
if pages > 1:
for page in range(2, pages + 1):
r = s.get(url.format(page))
soup = bs(r.content, 'lxml')
add_reviews(s, soup, results)
df = pd.DataFrame(results, columns = ['Title', 'Link', 'Rating', 'Review'])
print(df)