先上代码:
import requests
from bs4 import BeautifulSoup
import time
class top250books():
def html(self, href):
max_span = 225
for page in range(0, int(max_span), 25):
page_url = href + '?start=' + str(page)
self.parse(page_url)
def parse(self, url):
html = self.request(url)
all_tables = BeautifulSoup(html.text, 'lxml').find('div', class_='indent').find_all('table')
for table in all_tables:
time.sleep(0.5)
title = table.find('div', class_='pl2').find('a').get_text()
info = table.find('p', class_='pl').get_text()
rating_nums = table.find('span', class_='rating_nums').get_text()
rating_people = table.find('span', class_='pl