面向对象改写某小说月票----指定月份:
import json
import requests
from lxml import etree
from fake_useragent import FakeUserAgent
import re
from fontTools.ttLib import TTFont
import time
import datetime
class QiDian:
def __init__(self, month, page):
"""实例化属性,构造参数"""
self.url_ = f"https://www.qidian.com/rank/yuepiao/month{month}/page{page}/"
# 构造请求头
self.headers = {
"user-agent": FakeUserAgent().random,
"cookie": 'e2=%7B%22pid%22%3A%2 newstatisticUUID=1628952805_947624384; _csrfToken=t9wbkS56gV34qOb1QClNG; e1=%7B%22pid%22%3A%22qd_p_qidian%22%2C%22eid%22%3A%22qd_A16%22%2C%22l1%22%3A3%7D; e2=%7B%22pid',
"referer": 'https://www.qidian.com/'
}
def send_requert(self):
# 2.发送请求,获取响应数据
response_ = requests.get(self.url_, headers=self.headers)
data_str = response_.content.decode()
return data_str
def parse_data(self, data_str, page):
# 3.解析数据
html_obj = etree.HTML(data_str)
# a.提取小说名字
title_list = html_obj.xpath('//h4/a[@target="_blank"]/text()')
print(f"第{page}页小说名列表为:", len(title_list), title_list)
# 正则提取月票
mon_list_ = re.findall(r'</style><span class=".*?">(.*?)</span></span>月票</p>', data_str)
print("其加密月票源列表为:", len(mon_list_), mon_list_)
# 去掉特殊字符
mon_list = []
for i in mon_list_:
j = re.findall("\d+", i)
mon_list.append(j)
# print(f"第{page}页小说的加密月票列表为:", len(mon_list), mon_list)
return html_obj, title_list, mon_list
def decryption(self, html_obj, mon_list):
"""对加密的月票进行解密"""
# ①寻找加密文件的实时url,获取保存加密文件
woff_url_ = html_obj.xpath('//span/style/text()')[0]
woff_url = re.findall(r"format\('eot'\); src: url\('(.*?)'\) format\('woff'\)", woff_url_)[0]
print("其加密文件的实时url:", woff_url)
# ②发送请求,获取保存加密文件
response_woff = requests.get(woff_url, headers=self.headers)
woff_file = response_woff.content
with open("月票加密_02.woff", "wb") as f:
f.write(woff_file)
print(f"<第{page}页的月票加密文件已经保存完毕>")
font_obj = TTFont("月票加密_02.woff")
font_obj.saveXML("月票加密_02.xml")
cmap_dict = font_obj.getBestCmap()
# print(f"第{page}页加密关系映射源表为:",cmap_dict)
# ④对加密关系映射表进行处理
# 构造一个字典
dict_ = {'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7',
'eight': '8', 'nine': '9', }
for key in cmap_dict: # 键 100055
for i in dict_: # 键 'zero'
if cmap_dict[key] == i:
cmap_dict[key] = dict_[i]
# print("处理后的加密关系映射表为:", cmap_dict)
# ⑤解析加密月票
for m in mon_list:
for i in enumerate(m):
for k in cmap_dict:
if i[1] == str(k):
m[i[0]] = cmap_dict[k]
# print(f'第{page}页解密后的月票列表:',mon_list)
# ⑥拼接月票数据
mon_ticket = []
for i in mon_list:
i = "".join(i)
mon_ticket.append(i)
print("处理后最终的月票明文数据列表为:", len(mon_ticket), mon_ticket)
return mon_ticket
def save(self, title_list, mon_ticket):
# 4.保存数据
with open("起点月票.json", "a", encoding="utf-8") as f:
for i in range(len(title_list)):
dict = {}
dict[title_list[i]] = mon_ticket[i]
data_json = json.dumps(dict, ensure_ascii=False) + ",\n"
f.write(data_json)
print(f"<第{page}页月票数据下载完毕!>")
# 判断最后一页
if len(title_list) < 20:
print("这已经是最后一页了!")
quit()
# 打印翻页间隔符
print("*" * 100)
# 降低请求频率
time.sleep(1.5)
def run(self):
data_str = self.send_requert()
html_obj, title_list, mon_list = self.parse_data(data_str, page)
mon_ticket = self.decryption(html_obj, mon_list)
self.save(title_list, mon_ticket)
if __name__ == '__main__':
# 选择月份
for a in range(3):
today = datetime.datetime.now()
today_month = today.month
# 指定月份,不是循环
month_ = int(input("请输入你要查询的月份:"))
month = f"{month_:02d}"
# 判断输入的月份是否超出目前月份
if month_ <= today_month:
break
else:
if a == 2:
print("你的输入已错误3次!请下次尝试!")
quit()
print(f"你的输入已错误{a + 1}次!还有{2 - a}次机会!")
continue
# 翻页操作
pages = int(input("请输入你要查询的页数:"))
for page in range(pages):
page = str(page + 1)
qidian = QiDian(month, page)
qidian.run()