面向对象改写某小说月票----指定月份

面向对象改写某小说月票----指定月份:


import json
import requests
from lxml import etree
from fake_useragent import FakeUserAgent
import re
from fontTools.ttLib import TTFont
import time
import datetime


class QiDian:
    def __init__(self, month, page):
        """实例化属性,构造参数"""
        self.url_ = f"https://www.qidian.com/rank/yuepiao/month{month}/page{page}/"
        # 构造请求头
        self.headers = {
            "user-agent": FakeUserAgent().random,
            "cookie": 'e2=%7B%22pid%22%3A%2 newstatisticUUID=1628952805_947624384; _csrfToken=t9wbkS56gV34qOb1QClNG; e1=%7B%22pid%22%3A%22qd_p_qidian%22%2C%22eid%22%3A%22qd_A16%22%2C%22l1%22%3A3%7D; e2=%7B%22pid',
            "referer": 'https://www.qidian.com/'
        }

    def send_requert(self):
        # 2.发送请求,获取响应数据
        response_ = requests.get(self.url_, headers=self.headers)
        data_str = response_.content.decode()
        return data_str

    def parse_data(self, data_str, page):
        # 3.解析数据
        html_obj = etree.HTML(data_str)
        # a.提取小说名字
        title_list = html_obj.xpath('//h4/a[@target="_blank"]/text()')
        print(f"第{page}页小说名列表为:", len(title_list), title_list)

        # 正则提取月票
        mon_list_ = re.findall(r'</style><span class=".*?">(.*?)</span></span>月票</p>', data_str)
        print("其加密月票源列表为:", len(mon_list_), mon_list_)
        # 去掉特殊字符
        mon_list = []
        for i in mon_list_:
            j = re.findall("\d+", i)
            mon_list.append(j)
        # print(f"第{page}页小说的加密月票列表为:", len(mon_list), mon_list)

        return html_obj, title_list, mon_list

    def decryption(self, html_obj, mon_list):
        """对加密的月票进行解密"""
        # ①寻找加密文件的实时url,获取保存加密文件
        woff_url_ = html_obj.xpath('//span/style/text()')[0]
        woff_url = re.findall(r"format\('eot'\); src: url\('(.*?)'\) format\('woff'\)", woff_url_)[0]
        print("其加密文件的实时url:", woff_url)

        # ②发送请求,获取保存加密文件
        response_woff = requests.get(woff_url, headers=self.headers)
        woff_file = response_woff.content
        with open("月票加密_02.woff", "wb") as f:
            f.write(woff_file)
        print(f"<第{page}页的月票加密文件已经保存完毕>")

        font_obj = TTFont("月票加密_02.woff")
        font_obj.saveXML("月票加密_02.xml")
        cmap_dict = font_obj.getBestCmap()
        # print(f"第{page}页加密关系映射源表为:",cmap_dict)

        # ④对加密关系映射表进行处理
        # 构造一个字典
        dict_ = {'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7',
                 'eight': '8', 'nine': '9', }
        for key in cmap_dict:  # 键 100055
            for i in dict_:  # 键  'zero'
                if cmap_dict[key] == i:
                    cmap_dict[key] = dict_[i]
        # print("处理后的加密关系映射表为:", cmap_dict)

        # ⑤解析加密月票
        for m in mon_list:
            for i in enumerate(m):
                for k in cmap_dict: 
                    if i[1] == str(k):
                        m[i[0]] = cmap_dict[k]
        # print(f'第{page}页解密后的月票列表:',mon_list)

        # ⑥拼接月票数据
        mon_ticket = []
        for i in mon_list: 
            i = "".join(i)  
            mon_ticket.append(i)
        print("处理后最终的月票明文数据列表为:", len(mon_ticket), mon_ticket)
        return mon_ticket

    def save(self, title_list, mon_ticket):
        # 4.保存数据
        with open("起点月票.json", "a", encoding="utf-8") as f:
            for i in range(len(title_list)):
                dict = {}
                dict[title_list[i]] = mon_ticket[i]
                data_json = json.dumps(dict, ensure_ascii=False) + ",\n"
                f.write(data_json)

        print(f"<第{page}页月票数据下载完毕!>")
        # 判断最后一页
        if len(title_list) < 20:
            print("这已经是最后一页了!")
            quit()
        # 打印翻页间隔符
        print("*" * 100)
        # 降低请求频率
        time.sleep(1.5)

    def run(self):
        data_str = self.send_requert()
        html_obj, title_list, mon_list = self.parse_data(data_str, page)
        mon_ticket = self.decryption(html_obj, mon_list)
        self.save(title_list, mon_ticket)


if __name__ == '__main__':

    # 选择月份
    for a in range(3): 
        today = datetime.datetime.now()
        today_month = today.month 
        # 指定月份,不是循环
        month_ = int(input("请输入你要查询的月份:"))
        month = f"{month_:02d}" 
        # 判断输入的月份是否超出目前月份
        if month_ <= today_month:
            break
        else:
            if a == 2:
                print("你的输入已错误3次!请下次尝试!")
                quit()
            print(f"你的输入已错误{a + 1}次!还有{2 - a}次机会!")
            continue
    # 翻页操作
    pages = int(input("请输入你要查询的页数:"))
    for page in range(pages): 
        page = str(page + 1)

        qidian = QiDian(month, page)
        qidian.run()

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值