Python爬虫学习(七)执行定时任务爬取12306余票信息发送邮件通知

分析

在这里插入图片描述

我们输入的上海,北京都变成了对应的编号,比如,上海(SHH)、北京(BJP),所以当我们程序进行输入的时候要进行一下处理,12306的一个地方存储着这些城市名与编码对应的文档:

GET请求: https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.8971

在这里插入图片描述
点击查询,通过F12找到接口:
在这里插入图片描述

GET请求
Request URL: https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2020-12-31&leftTicketDTO.from_station=SHH&leftTicketDTO.to_station=BXP&purpose_codes=ADULT
leftTicketDTO.train_date:查询的日期       2020-12-31
leftTicketDTO.from_station:查询的出发地   SHH上海
leftTicketDTO.to_station:查询的目的地     BXP北京  郑州ZZF
purpose_codes:不太清楚这个字段是用来做什么的,就默认吧

在这里插入图片描述

整体流程:
执行定时任务,查询12306余票信息,如果有合适的车次,就发送邮件通知。

1、查询余票信息

查询城市编码与城市名称对应信息。

# {'VAP': '北京北'}
def getCityCode1():
    url = "https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.8971"
    response = requests.get(url, verify=False)
    # 将车站的名字和编码进行提取
    chezhan = re.findall(r'([\u4e00-\u9fa5]+)\|([A-Z]+)', response.text)
    chezhan_code = dict(chezhan)
    # 进行交换
    chezhan_names = dict(zip(chezhan_code.values(),chezhan_code.keys() ))
    # 打印出得到的车站字典
    print(chezhan_names)  # {'VAP': '北京北', 'BOP': '北京东',
    return chezhan_names
# {'北京北':'VAP'}
def getCityCode2():
    url = "https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.8971"
    response = requests.get(url, verify=False)
    # 将车站的名字和编码进行提取
    chezhan = re.findall(r'([\u4e00-\u9fa5]+)\|([A-Z]+)', response.text)
    chezhan_code = dict(chezhan)
    # 进行交换
    chezhan_names = dict(zip(chezhan_code.keys(),chezhan_code.values() ))
    # 打印出得到的车站字典
    print('getCityCode2()',chezhan_names)
    return chezhan_names

解析余票信息数据


def getdata(html):
    chezhan_names = getCityCode1()
    html = json.loads(html)
    # 定义一个空列表
    ticketList = []
    for i in html['data']['result']:
        # 创建一个字典,用于存放字段信息
        name = [
            " 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座","一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"
        ]
        # 创建一个字典  用于存放车次信息
        data = {
            " 车次 ": '',
            "出发车站": '',
            "到达车站": '',
            "出发时间": '',
            "到达时间": '',
            " 历时 ": '',
            "商务座": '',
            "一等座": '',
            "二等座": '',
            "高级软卧": '',
            "软卧": '',
            "动卧": '',
            "硬卧": '',
            "软座": '',
            "硬座": '',
            "无座": '',
            "其他": '',
            "备注": ''
        }
        # 将各项信息提取并赋值
        item = i.split('|')  # 使用“|”进行分割
        data[" 车次 "] = item[3]  # 获取车次信息,在3号位置
        data["出发车站"] = chezhan_names[item[6]]  # 始发站信息在6号位置
        data["到达车站"] = chezhan_names[item[7]]  # 终点站信息在7号位置
        data["出发时间"] = item[8]  # 出发时间在8号位置
        data["到达时间"] = item[9]  # 抵达时间在9号位置
        data[" 历时 "] = item[10]  # 经历时间在10号位置
        data["商务座"] = item[32] or item[25]  # 特别注意,商务座在32或25位置
        data["一等座"] = item[31]  # 一等座信息在31号位置
        data["二等座"] = item[30]  # 二等座信息在30号位置
        data["高级软卧"] = item[21]  # 高级软卧信息在21号位置
        data["软卧"] = item[23]  # 软卧信息在23号位置
        data["动卧"] = item[27]  # 动卧信息在27号位置
        data["硬卧"] = item[28]  # 硬卧信息在28号位置
        data["软座"] = item[24]  # 软座信息在24号位置
        data["硬座"] = item[29]  # 硬座信息在29号位置
        data["无座"] = item[26]  # 无座信息在26号位置
        data["其他"] = item[22]  # 其他信息在22号位置
        data["备注"] = item[1]  # 备注信息在1号位置
        # 如果没有信息,那么就用“-”代替
        for pos in name:
            if data[pos] == "":
                data[pos] = "-"
        if data["硬卧"]=="-" and  data["硬座"] =="-":
            continue
        if data["硬卧"]=="无" and  data["硬座"] =="无":
            continue
        ticketList.append(data)
    return  ticketList

保存数据为csv文件

# 保存数据
def writeData(ticketList):
    with open(date+from_station+"->"+to_station+'.csv', 'w', encoding='utf-8', newline='') as f:
        # writer = csv.DictWriter(f, fieldnames=["station_train_code","from_station_name","to_station_name","start_time","arrive_time",
        #     "lishi","swz_num","zy_num","ze_num","dw_num","gr_num","rw_num","yw_num","rz_num","yz_num","wz_num","qt_num","note_num"])
        writer = csv.DictWriter(f, fieldnames=[" 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座","一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"])
        writer.writeheader()  # 写入表头
        for each in ticketList:
            # 逐行写入
            writer.writerow(each)
if __name__ == '__main__':
    # 乘车日期
    date = "2020-12-15"
    # 出发站
    from_station = "秦皇岛"
    # 到达站
    to_station = "郑州"
    print(from_station)
    print(to_station)
    from_station1 = getCityCode2()[from_station]
    to_station1 = getCityCode2()[to_station]
    print("出发城市:",from_station)
    print("到达城市:",to_station)
    print("url:",url2.format(date, from_station1, to_station1))
    response = requests.get(url=url2.format(date, from_station1, to_station1), headers=headers)
    response.raise_for_status()  # 如果发送了一个错误的请求,会抛出异常
    response.encoding = response.apparent_encoding
    print("status_code:",response.status_code)
    selector = etree.HTML(response.text)
    try:
        msg1 = selector.xpath('//li[@id="err_bot"]/text()')[0]
        msg = msg1.strip()
        if msg == '网络可能存在问题,请您重试一下!':
            raise ValueError('网络可能存在问题,或者当前日期没有车次信息,请您重试一下')
    except IndexError:
        print('接口正常!')
        # traceback.print_exc()
    # showTicket(response.text)
    print("获取车次信息")
    ticketList = getdata(response.text)
    print("保存车次信息")
    writeData(ticketList)
    print("ticketList:",ticketList)
    list = []
    for item in ticketList:
        d = {}
        d['车次'] = item[' 车次 ']
        d['出发车站'] = item['出发车站']
        d['到达车站'] = item['到达车站']
        d['出发时间'] = item['出发时间']
        d['到达时间'] = item['到达时间']
        d['硬卧'] = item['硬卧']
        d['硬座'] = item['硬座']
        list.append(d)
    if len(list) :
        list_str = str(list)
        list_str2 = list_str.replace('}','\n')
        info = '日期:'+date+'\n'+list_str2
        print(info)
        SendeMail.main.sendTextEmail('从'+from_station+'前往'+to_station+'的',info)
    else:
        print('没有车次信息')
   

效果

在这里插入图片描述

2、发送邮件通知

# author: LiuShihao
# data: 2020/12/7 2:57 下午
# youknow: 各位老铁,我的这套代码曾经有人出价三个亿我没有卖,如今拿出来和大家分享,不求别的,只求大家免费的小红心帮忙点一点,这里谢过了。
# desc: 发送邮件

import smtplib
from email.mime.text import MIMEText


# 第三方 SMTP 服务
mail_host = "smtp.qq.com"  # SMTP服务器
mail_user = "*******@qq.com"  # 用户名
mail_pass = "******"  # 密码(这里的密码不是登录邮箱密码,而是授权码)

sender = '******@qq.com'  # 发件人邮箱
receivers = ['******@qq.com','******@qq.com','******@qq.com']  # 接收人邮箱 
"""
title标题:从上海前往郑州12306车票信息
info正文:
"""
# 1.使用Python发送纯文本电子邮件
def sendTextEmail(title,info):
    content = info+'\n有座位!请尽快购票'
    title = title+'12306车票信息'  # 邮件主题
    message = MIMEText(content, 'plain', 'utf-8')  # 内容, 格式, 编码
    message['From'] = "{}".format(sender)
    message['To'] = ",".join(receivers)
    message['Subject'] = title

    try:
        # QQ 邮箱需要 SSL 认证,所以 SMTP 已经不能满足要求,而需要SMTP_SSL
        smtpObj = smtplib.SMTP_SSL(mail_host, 465)  # 启用SSL发信, 端口一般是465
        smtpObj.login(mail_user, mail_pass)  # 登录验证
        smtpObj.sendmail(sender, receivers, message.as_string())  # 发送
        print("mail has been send successfully.")
    except smtplib.SMTPException as e:
        print(e)


if __name__ == '__main__':
    sendTextEmail('从北京前往的上海','Text')

效果

在这里插入图片描述

3、定时任务

将查询余票信息的方法封装一下,导入schedule模块

import schedule
if __name__ == '__main__':

    # 乘车日期
    date = "2020-12-15"
    # 出发站
    from_station = "秦皇岛"
    # 到达站
    to_station = "郑州"
    GET(date,from_station,to_station)
    # 每小时执行一次
    # schedule.every().hour.do(GET,date,from_station,to_station)
    # 每分钟执行一次
    schedule.every().minute.do(GET,date,from_station,to_station)
    while True:
      schedule.run_pending()
      time.sleep(2)

在这里插入图片描述
在这里插入图片描述

完整代码

# author: LiuShihao
# data: 2020/12/7 9:20 上午
# youknow: 各位老铁,我的这套代码曾经有人出价三个亿我没有卖,如今拿出来和大家分享,不求别的,只求大家免费的小红心帮忙点一点,这里谢过了。
# desc: 查询余票信息

import requests
import json
import re
from colorama import init,Fore
from prettytable import PrettyTable
from lxml import etree
import traceback
import csv
import SendeMail.main
import schedule
import time
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)


init(autoreset=False)
"""
leftTicketDTO.train_date:查询的日期       2020-12-31
leftTicketDTO.from_station:查询的出发地   SHH上海
leftTicketDTO.to_station:查询的目的地     BXP北京  郑州ZZF
purpose_codes:不太清楚这个字段是用来做什么的,就默认吧

可以从我们递交的URL请求看出,我们输入的成都,深圳都变成了对应的编号,比如,成都(CDW)、深圳(SZQ),所以当我们程序进行输入的时候要进行一下处理,12306的一个地方存储着这些城市名与编码对应的文档:
https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.8971
"""

class Colored(object):
  def red(self,s):
    return Fore.LIGHTRED_EX + s + Fore.RESET
  def green(self,s):
    return Fore.LIGHTGREEN_EX + s + Fore.RESET
  def yellow(self,s):
    return Fore.LIGHTYELLOW_EX + s + Fore.RESET
  def white(self,s):
    return Fore.LIGHTWHITE_EX + s + Fore.RESET
  def blue(self,s):
    return Fore.LIGHTBLUE_EX + s + Fore.RESET
driverfile_path = r'/Browser/Baidu/chromedriver'
url2= r'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date={0}&leftTicketDTO.from_station={1}&leftTicketDTO.to_station={2}&purpose_codes=ADULT'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5702.400 QQBrowser/10.2.1893.400",
    # 如果不携带cookie信息,调用接口12306会拒绝访问,显示网络错误
    "Cookie": "JSESSIONID=E9290F20C37A2BFA56C8F0CF732C7D05; BIGipServerpool_passport=266600970.50215.0000; RAIL_EXPIRATION=1607578913111; RAIL_DEVICEID=nK0qSNOKRXI4lxn6fPsRqT-rZdcQUhNIENZwmddIyDzW_auv2mnP-rktmCKFBKvqYVPM3VCb4q0IyoQDn5BC1M0a7XMAbJtEvcOsFHv-Qx6okcTqkXTRYWoX6_t9T20ny_DGckI5sAx7jIrBs8WLEOF7KmxuH27v; route=9036359bb8a8a461c164a04f8f50b252; _jc_save_fromStation=%u4E0A%u6D77%2CSHH; _jc_save_fromDate=2020-12-31; _jc_save_toDate=2020-12-07; _jc_save_wfdc_flag=dc; _jc_save_toStation=%u90D1%u5DDE%2CZZF; BIGipServerotn=233832970.64545.0000"
  }


def getCity():
    url = "https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.8971"
    response = requests.get(url, verify=False)
    # 将车站的名字和编码进行提取
    chezhan = re.findall(r'([\u4e00-\u9fa5]+)\|([A-Z]+)', response.text)
    chezhan_code = dict(chezhan)
    return chezhan_code

# {'VAP': '北京北'}
def getCityCode1():
    chezhan_code = getCity()
    # 进行交换
    chezhan_names = dict(zip(chezhan_code.values(),chezhan_code.keys() ))
    # 打印出得到的车站字典
    # print(chezhan_names)  # {'VAP': '北京北', 'BOP': '北京东',
    return chezhan_names
# {'北京北':'VAP'}
def getCityCode2():
    chezhan_code = getCity()
    # 进行交换
    chezhan_names = dict(zip(chezhan_code.keys(),chezhan_code.values() ))
    # 打印出得到的车站字典
    # print('getCityCode2()',chezhan_names)
    return chezhan_names
# 解析数据
def getdata(html):
    chezhan_names = getCityCode1()
    html = json.loads(html)
    # 定义一个空列表
    ticketList = []
    for i in html['data']['result']:
        # 创建一个字典,用于存放字段信息
        name = [
            " 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座","一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"
        ]
        # 创建一个字典  用于存放车次信息
        data = {
            " 车次 ": '',
            "出发车站": '',
            "到达车站": '',
            "出发时间": '',
            "到达时间": '',
            " 历时 ": '',
            "商务座": '',
            "一等座": '',
            "二等座": '',
            "高级软卧": '',
            "软卧": '',
            "动卧": '',
            "硬卧": '',
            "软座": '',
            "硬座": '',
            "无座": '',
            "其他": '',
            "备注": ''
        }
        # 将各项信息提取并赋值
        item = i.split('|')  # 使用“|”进行分割
        data[" 车次 "] = item[3]  # 获取车次信息,在3号位置
        data["出发车站"] = chezhan_names[item[6]]  # 始发站信息在6号位置
        data["到达车站"] = chezhan_names[item[7]]  # 终点站信息在7号位置
        data["出发时间"] = item[8]  # 出发时间在8号位置
        data["到达时间"] = item[9]  # 抵达时间在9号位置
        data[" 历时 "] = item[10]  # 经历时间在10号位置
        data["商务座"] = item[32] or item[25]  # 特别注意,商务座在32或25位置
        data["一等座"] = item[31]  # 一等座信息在31号位置
        data["二等座"] = item[30]  # 二等座信息在30号位置
        data["高级软卧"] = item[21]  # 高级软卧信息在21号位置
        data["软卧"] = item[23]  # 软卧信息在23号位置
        data["动卧"] = item[27]  # 动卧信息在27号位置
        data["硬卧"] = item[28]  # 硬卧信息在28号位置
        data["软座"] = item[24]  # 软座信息在24号位置
        data["硬座"] = item[29]  # 硬座信息在29号位置
        data["无座"] = item[26]  # 无座信息在26号位置
        data["其他"] = item[22]  # 其他信息在22号位置
        data["备注"] = item[1]  # 备注信息在1号位置
        # 如果没有信息,那么就用“-”代替
        for pos in name:
            if data[pos] == "":
                data[pos] = "-"
        if data["硬卧"]=="-" and  data["硬座"] =="-":
            continue
        if data["硬卧"]=="无" and  data["硬座"] =="无":
            continue
        ticketList.append(data)
    return  ticketList
# 将数据显示在控制台
def showTicket(html):
  chezhan_names = getCityCode1()
  html = json.loads(html)
  table = PrettyTable([" 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座"," 一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"])
  for i in html['data']['result']:
    name = [
          "station_train_code",
          "from_station_name",
          "to_station_name",
          "start_time",
          "arrive_time",
          "lishi",
          "swz_num",
          "zy_num",
          "ze_num",
          "dw_num",
          "gr_num",
          "rw_num",
          "yw_num",
          "rz_num",
          "yz_num",
          "wz_num",
          "qt_num",
          "note_num"
        ]
    data = {
          "station_train_code": '',
          "from_station_name": '',
          "to_station_name": '',
          "start_time": '',
          "arrive_time": '',
          "lishi": '',
          "swz_num": '',
          "zy_num": '',
          "ze_num": '',
          "dw_num": '',
          "gr_num": '',
          "rw_num": '',
          "yw_num": '',
          "rz_num": '',
          "yz_num": '',
          "wz_num": '',
          "qt_num": '',
          "note_num": ''
        }
    #将各项信息提取并赋值
    item = i.split('|')                 #使用“|”进行分割
    data["station_train_code"] = item[3]        #获取车次信息,在3号位置
    data["from_station_name"]  = item[6]        #始发站信息在6号位置
    data["to_station_name"]   = item[7]        #终点站信息在7号位置
    data["start_time"]     = item[8]        #出发时间在8号位置
    data["arrive_time"]     = item[9]        #抵达时间在9号位置
    data["lishi"]        = item[10]       #经历时间在10号位置
    data["swz_num"]       = item[32] or item[25] #特别注意,商务座在32或25位置
    data["zy_num"]       = item[31]       #一等座信息在31号位置
    data["ze_num"]       = item[30]       #二等座信息在30号位置
    data["gr_num"]       = item[21]       #高级软卧信息在21号位置
    data["rw_num"]       = item[23]       #软卧信息在23号位置
    data["dw_num"]       = item[27]       #动卧信息在27号位置
    data["yw_num"]       = item[28]       #硬卧信息在28号位置
    data["rz_num"]       = item[24]       #软座信息在24号位置
    data["yz_num"]       = item[29]       #硬座信息在29号位置
    data["wz_num"]       = item[26]       #无座信息在26号位置
    data["qt_num"]       = item[22]       #其他信息在22号位置
    data["note_num"]      = item[1]        #备注信息在1号位置
    color = Colored()
    data["note_num"] = color.white(item[1])
    #如果没有信息,那么就用“-”代替
    for pos in name:
      if data[pos] == "":
        data[pos] = "-"
    tickets = []
    cont = []
    cont.append(data)
    for x in cont:
      tmp = []
      for y in name:
        if y == "from_station_name":
          s = color.green(chezhan_names[data["from_station_name"]])
          tmp.append(s)
        elif y == "to_station_name":
          s = color.red(chezhan_names[data["to_station_name"]])
          tmp.append(s)
        elif y == "start_time":
          s = color.green(data["start_time"])
          tmp.append(s)
        elif y == "arrive_time":
          s = color.red(data["arrive_time"])
          tmp.append(s)
        elif y == "station_train_code":
          s = color.yellow(data["station_train_code"])
          tmp.append(s)
        else:
          tmp.append(data[y])
      tickets.append(tmp)
    for ticket in tickets:
      table.add_row(ticket)
  print(table)
# 保存数据
def writeData(ticketList):
    with open(date+from_station+"->"+to_station+'.csv', 'w', encoding='utf-8', newline='') as f:
        # writer = csv.DictWriter(f, fieldnames=["station_train_code","from_station_name","to_station_name","start_time","arrive_time",
        #     "lishi","swz_num","zy_num","ze_num","dw_num","gr_num","rw_num","yw_num","rz_num","yz_num","wz_num","qt_num","note_num"])
        writer = csv.DictWriter(f, fieldnames=[" 车次 ","出发车站","到达车站","出发时间","到达时间"," 历时 ","商务座","一等座","二等座","高级软卧","软卧","动卧","硬卧","软座","硬座","无座","其他","备注"])
        writer.writeheader()  # 写入表头
        for each in ticketList:
            # 逐行写入
            writer.writerow(each)
def GET(date,from_station,to_station):
    print("现在是", time.strftime('%Y年%m月%d日 %H时%M分%S秒', time.localtime()),'查询余票信息',)
    from_station1 = getCityCode2()[from_station]
    to_station1 = getCityCode2()[to_station]
    response = requests.get(url=url2.format(date, from_station1, to_station1), headers=headers,verify=False)
    response.raise_for_status()  # 如果发送了一个错误的请求,会抛出异常
    response.encoding = response.apparent_encoding
    print("status_code:", response.status_code)
    selector = etree.HTML(response.text)
    try:
        msg1 = selector.xpath('//li[@id="err_bot"]/text()')[0]
        msg = msg1.strip()
        if msg == '网络可能存在问题,请您重试一下!':
            raise ValueError('网络可能存在问题,或者当前日期没有车次信息,请您重试一下')
    except IndexError:
        print('接口正常!')
    print("获取车次信息")
    ticketList = getdata(response.text)
    print("保存车次信息")
    writeData(ticketList)
    # print("ticketList:", ticketList)
    list = []
    for item in ticketList:
        d = {}
        d['车次'] = item[' 车次 ']
        d['出发车站'] = item['出发车站']
        d['到达车站'] = item['到达车站']
        d['出发时间'] = item['出发时间']
        d['到达时间'] = item['到达时间']
        d['硬卧'] = item['硬卧']
        d['硬座'] = item['硬座']
        list.append(d)
    if len(list):
        list_str = str(list)
        list_str2 = list_str.replace('}', '\n')
        info = '日期:' + date + '\n' + list_str2
        print(info)
        SendeMail.main.sendTextEmail('从' + from_station + '前往' + to_station + '的', info)
    else:
        print('没有车次信息')

if __name__ == '__main__':

    # 乘车日期
    date = "2020-12-15"
    # 出发站
    from_station = "秦皇岛"
    # 到达站
    to_station = "郑州"
    GET(date,from_station,to_station)
    # 每小时执行一次
    # schedule.every().hour.do(GET,date,from_station,to_station)
    # 每分钟执行一次
    schedule.every().minute.do(GET,date,from_station,to_station)
    while True:
        schedule.run_pending()
        time.sleep(2)
  • 3
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Liu_Shihao

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值