import requests
from bs4 import BeautifulSoup
import json
import os
url = "https://book.douban.com/top250"
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"
}
def getHtml(num):
r = requests.get(url,headers=header,params={
"start": num})
return r.text
html = getHtml(0)
def getPrintData(html):
soup = BeautifulSoup(html,"lxml")
books = soup.select("tr")
for book in books:
tds=book.select("td")
print("书名:",tds[1].div.a.text.strip().split("\n")