使用selenium爬取豆瓣图书信息
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.options import Options
def all_bookonfo(driver, fp):
url = "https://book.douban.com/tag/%E4%B8%AD%E5%9B%BD%E6%96%87%E5%AD%A6"
driver.get(url)
a_list = driver.find_elements(By.XPATH, "//ul[@class='subject-list']/li/div[2]")
for url in a_list:
bookname = url.find_element(By.XPATH, "./h2/a").text
info = url.find_element(By.XPATH, "./div[1]").text
writer = info.split("/")[0]
publication = info.split("/")[1]
year = info.split("/")[2]
price = info.split("/")[3].replace("元", " ")
value = url.find_element(By.XPATH, "./div[2]/span[2]").text
print(bookname, writer, publication, year, price, value)
fp.write(bookname+","+writer+","+publication+","+year+","+price+","+value+"\n")
if __name__ == '__main__':
opt = Options()
opt.add_argument("--headless")
driver = webdriver.Edge(opt)
fp = open("./doubantushu.txt3", "w+", encoding="utf-8")
all_bookonfo(driver, fp)
fp.close()