from bs4 import BeautifulSoup
import requests
urls = ['https://read.douban.com/ebooks/category/new/?cat=book&sort=new&start={}.'.format(str(i)) for i in range(0, 100, 20)]#这里有一个format函数和range可以实现将网页全部打印出来
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
fooknames = soup.select('body > div > div.main > article > section > div.bd > ul > li > div.info > div.title > a')
writers = soup.select('body > div > div.main > article > section > div.bd > ul > li > div.info > p > span > span.labeled-text > a')
evaluations = soup.select(' body > div > div.main > article > section > div.bd > ul > li > div.info > div.article-desc-brief')
for fookname,writer,evaluation in zip(fooknames, writers, evaluations):
print('书名:%-30s作者:%-40s评价:%-40s'%(fookname.get_text(),writer.get_text(),evaluation.get_text()))
ps:
这里有一个format函数和range可以实现将网页全部打印出来