爬虫[当当(6000条数据)XPATH]

import requests
from lxml import etree
import csv

# 创建一个空列表
book_list = []

# 对页数进行循环
for i in range(1, 100, 1):
    url = f" http://category.dangdang.com/pg{i}-cp01.05.06.00.00.00-srsort_sale_amt_desc.html"

# 设置请求头
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
    }
    response = requests.get(url=url, headers=headers)
    # print(response)
    # < Response[200] >

    book = etree.HTML(response.text)

# 所有书籍信息的节点
    books = book.xpath('/html/body/div[2]/div/div[3]/div[1]/div[1]/div[2]/div/ul/li')
    # print(len(books))
    for j in books:
        a = {}
        book_name = j.xpath("./p[1]/a/text()")
        a["书名"] = book_name

        price = j.xpath("./p[3]/span[1]/text()")
        a["价格"] = price

        author = j.xpath("./p[5]/span[1]/a/text()")
        if len(author) == 0:
            author = ["空"]
        else:
            author = author
        a["作者"] = author

        press = j.xpath("./p[5]/span[3]/a/text()")
        if len(press) == 0:
            press = ["NULL"]
        else:
            press = press
        a["出版社"] = press
        comment = j.xpath("./p[2]/a/text()")
        a["评论"] = comment
        information = j.xpath("./p[2]/text()")
        if len(information) == 0:
            information = ["NULL"]
        else:
            information = information
        a["介绍"] = information
        book_list.append(a)
    print(f"第{i}页爬取完成!")

# 保存为CSV模式
csvfile = open("当当文集.csv", "w", encoding="utf-8", newline="")
file = csv.DictWriter(csvfile, fieldnames=["书名", "价格", "作者", "出版社", "评论", "介绍"])
file.writeheader()
file.writerows(book_list)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

凉城姑娘

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值