爬虫实例-爬取纵横小说网月票榜

最新推荐文章于 2023-10-10 14:55:16 发布

不设限。

最新推荐文章于 2023-10-10 14:55:16 发布

阅读量466

点赞数

文章标签： python Powered by 金山文档

本文链接：https://blog.csdn.net/m0_65267034/article/details/128982362

版权

# 1.导入模块
import requests
from lxml import etree
import xlwt

title = []
category = []
ticket = []
info = []
chapter = []

# 2.发送请求
url = 'https://www.zongheng.com/rank/details.html?rt=1&d=1&i=2'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}


# 获取列表里的文本
def get_first_name(list):
    try:
        return list[0].strip()
    except:
        return ''


urls = ['https://www.zongheng.com/rank/details.html?rt=1&d=1&i=2&p={}'.format(i) for i in range(1, 11)]
count = 1
for url in urls:
    res = requests.get(url=url, headers=headers)
    # print(res.status_code)
    html = etree.HTML(res.text)
    lis = html.xpath('//div[@class="rank_d_list borderB_c_dsh clearfix"]')
    # print(len(lis))
    # 3.解析数据
    for i in lis:
        title.append(get_first_name(i.xpath('./div[2]/div[1]/a/text()')))
        category.append(get_first_name(i.xpath('./div[2]/div[2]/a[2]/text()')))
        ticket.append(get_first_name(i.xpath('./div[3]/div/div[2]/text()')))
        info.append(get_first_name(i.xpath('./div[2]/div[3]/text()')))
        chapter.append(get_first_name(i.xpath('./div[2]/div[4]/a/text()')))

       
        count += 1

# 4.存储数据
try:
    work_book = xlwt.Workbook(encoding='utf-8')
    sheet = work_book.add_sheet('小说', cell_overwrite_ok=True)
    heads = ['序号', '小说名', '类别', '票数', '简介', '最新章节']
    for i in range(len(heads)):
        sheet.write(0, i, heads[i])

    for i in range(len(title)):
        sheet.write(i + 1, 0, i + 1)
        sheet.write(i + 1, 1, title[i])
        sheet.write(i + 1, 2, category[i])
        sheet.write(i + 1, 3, ticket[i])
        sheet.write(i + 1, 4, info[i])
        sheet.write(i + 1, 5, chapter[i])

    work_book.save('D:\Download\小说月票榜(升级版).xls')
    print('写入成功')
except Exception:
    print('写入失败')