# 1.导入模块
import requests
from lxml import etree
import xlwt
title = []
category = []
ticket = []
info = []
chapter = []
# 2.发送请求
url = 'https://www.zongheng.com/rank/details.html?rt=1&d=1&i=2'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
# 获取列表里的文本
def get_first_name(list):
try:
return list[0].strip()
except:
return ''
urls = ['https://www.zongheng.com/rank/details.html?rt=1&d=1&i=2&p={}'.format(i) for i in range(1, 11)]
count = 1
for url in urls:
res = requests.get(url=url, headers=headers)
# print(res.status_code)
html = etree.HTML(res.text)
lis = html.xpath('//div[@class="rank_d_list borderB_c_dsh clearfix"]')
# print(len(lis))
# 3.解析数据
for i in lis:
title.append(get_first_name(i.xpath('./div[2]/div[1]/a/text()')))
category.append(get_first_name(i.xpath('./div[2]/div[2]/a[2]/text()')))
ticket.append(get_first_name(i.xpath('./div[3]/div/div[2]/text()')))
info.append(get_first_name(i.xpath('./div[2]/div[3]/text()')))
chapter.append(get_first_name(i.xpath('./div[2]/div[4]/a/text()')))
count += 1
# 4.存储数据
try:
work_book = xlwt.Workbook(encoding='utf-8')
sheet = work_book.add_sheet('小说', cell_overwrite_ok=True)
heads = ['序号', '小说名', '类别', '票数', '简介', '最新章节']
for i in range(len(heads)):
sheet.write(0, i, heads[i])
for i in range(len(title)):
sheet.write(i + 1, 0, i + 1)
sheet.write(i + 1, 1, title[i])
sheet.write(i + 1, 2, category[i])
sheet.write(i + 1, 3, ticket[i])
sheet.write(i + 1, 4, info[i])
sheet.write(i + 1, 5, chapter[i])
work_book.save('D:\Download\小说月票榜(升级版).xls')
print('写入成功')
except Exception:
print('写入失败')
爬虫实例-爬取纵横小说网月票榜
最新推荐文章于 2023-10-10 14:55:16 发布