python爬取12306

config.json文件内容如下: 

 

{
   "iphone":"1582222222",
    "username":"xxx",
    "password":"xxxx",
    "bookinfos":{ "fromstation":"NJH","tostation":"SHH","date": "2018-10-02","info":
                  [
                    {"trainid": "T135", "seattype":["hard_sleep"]}
                  ]},
    "seattype":{"shangwu":"9","first_class_seat":"M","second_class_seat":"O","soft_sleep":"4","hard_sleep":"3","hard_seat":"1"},
    "station":{"NJH":"南京","SHH":"上海","XAY":"西安"},

   "domain":"https://kyfw.12306.cn/",
   "url_sendsms":"https://kyfw.12306.cn/otn/leftTicket/queryA",
   "url_captchacheck":"https://kyfw.12306.cn/passport/captcha/captcha-check",
   "url_login":"https://kyfw.12306.cn/passport/web/login",
    "url_leftticketinit":"https://kyfw.12306.cn/otn/leftTicket/init",
   "url_querya":"https://kyfw.12306.cn/otn/leftTicket/queryA",
    "url_captchaimg":"https://kyfw.12306.cn/passport/captcha/captcha-image?login_site=E&module=login&rand=sjrand",
    "url_uamtk":"https://kyfw.12306.cn/passport/web/auth/uamtk",
    "url_uamauthclient":"https://kyfw.12306.cn/otn/uamauthclient",
    "url_userlogin":"https://kyfw.12306.cn/otn/login/userLogin",
    "url_initmy12306":"https://kyfw.12306.cn/otn/index/initMy12306",
    "url_checkuser":"https://kyfw.12306.cn/otn/login/checkUser",
    "url_submitorder":"https://kyfw.12306.cn/otn/leftTicket/submitOrderRequest",
    "url_initdc":"https://kyfw.12306.cn/otn/confirmPassenger/initDc",
    "url_getpassenger":"https://kyfw.12306.cn/otn/confirmPassenger/getPassengerDTOs",
    "url_checkorderinfo":"https://kyfw.12306.cn/otn/confirmPassenger/checkOrderInfo",
    "url_getqueuecount":"https://kyfw.12306.cn/otn/confirmPassenger/getQueueCount",
    "url_confirmsingleforqueue":"https://kyfw.12306.cn/otn/confirmPassenger/confirmSingleForQueue",
    "url_queryorderwaittime":"https://kyfw.12306.cn/otn/confirmPassenger/queryOrderWaitTime",
    "url_resultorderfordcqueue":"https://kyfw.12306.cn/otn/confirmPassenger/resultOrderForDcQueue",
    "url_payorderinit":"https://kyfw.12306.cn/otn//payOrder/init"
}
import requests
from bs4 import BeautifulSoup
import json
import datetime
import re
import sys
import logging
import os
import http.cookiejar as cookielib
import chardet
import numpy as np
import time
class Ticket12306(object):

    def __init__(self):
        self.sessionid = requests.session()
        self.sessionid.cookies = cookielib.LWPCookieJar(filename="./ticket12306cookie.txt")

    def get_config(self):
        with open("./12306/config.json","rb") as config:
            self.config = json.load(config,encoding='utf-8')



    def querytrain(self):

        #初始化订票页面
        resp = self.sessionid.get(self.config['url_leftticketinit'])
        #print('初始化订票页面',resp,resp.text)
        querya = {
                    "leftTicketDTO.train_date": self.config['bookinfos']['date'],
                    "leftTicketDTO.from_station": self.config['bookinfos']['fromstation'],
                    "leftTicketDTO.to_station": self.config['bookinfos']['tostation'],
                    "purpose_codes": "ADULT"
                  }
        resp = self.sessionid.get(self.config['url_querya'],params=querya)
        traininfos = json.loads(resp.text)
        #print(chardet.detect(resp.text.),resp.text)
        print(chardet.detect(resp.content), resp.content)
        for traininfo in traininfos['data']['result']:
            traininfo.split("|")
            # 循环遍历每辆列车的信息
            data_list = traininfo.split("|")


            train = {
                'train_no':{'text':' 车次号码','value':data_list[3]},
                'from_station_name':{'text':' 出发站','value':data_list[6]},
                'to_station_name':{'text':' 终点站','value':data_list[7]},
                'start_time':{'text':' 出发时间','value':data_list[8]},
                'arrive_time':{'text':' 到达时间','value':data_list[9]},
                'time_fucked_up':{'text':' 总耗时','value':data_list[10]},
                'first_class_seat':{'text':' 一等座','value':data_list[31] or '--'},
                'second_class_seat':{'text':' 二等座','value':data_list[30] or '--'},
                'soft_sleep':{'text':' 软卧','value':data_list[23] or '--'},
                'hard_sleep':{'text':' 硬卧','value':data_list[28] or '--'},
                'hard_seat':{'text':' 硬座','value':data_list[29] or '--'},
                'no_seat':{'text':' 无座','value':data_list[26] or '--'},
                'secretStr':{'text':'secretStr','value':data_list[0].replace('%2B','+').replace('%2F','/').replace('%0A','\n').replace('%3D','=')}
            }
            yield  train
    def showtickets(self):
        for train in self.querytrain():
            print(train)

    def login(self):
        # 读取验证码
        res = self.sessionid.get(self.config['url_captchaimg'])
        if res.status_code != 200:
            print('获取验证码图片失败')
            return
        #print(res.text)
        # 保存验证码图片
        f = open("./captch12306.jpg", "wb")
        f.write(res.content)
        f.close()
        # 打开图片
        os.system('start captch12306.jpg')
        captchstr = input("请输入验证码顺序号,用逗号隔开:")
        captchxystr=""
        for captch in captchstr.split(','):
            captchxystr = captchxystr + str(tk.getcaptchxy(int(captch)))[1:-1]+","

        print(captchxystr)
        #验证验证码  result_code为4时成功
        captchcheckdata = {"answer": captchxystr[:-1], "rand": "sjrand", "login_site": "E"}
        resp = self.sessionid.post(self.config['url_captchacheck'],data=captchcheckdata)
        if resp.status_code != 200:
            print('验证验证码失败')
            return

        captchcheckrespdict = json.loads(resp.text)
        if captchcheckrespdict['result_code'] != "4":
            print('验证验证码失败', print(resp.text))
            return

        print(resp.text)
        #登录   result_code为0时成功
        loginpostdata = { "username": self.config['username'],    "password": self.config['password'],    "appid": "otn"}
        resp = self.sessionid.post(self.config['url_login'],data=loginpostdata)
        if resp.status_code != 200:
            print('login 失败',resp.status_code)
            return

        loginrespdict = json.loads(resp.text)
        if loginrespdict['result_code'] != 0:
            print('login 失败',resp.text)
            return
        print(resp.text)
        #uamtk  result_code为0时成功
        uamtkdata = {"appid": "otn"}
        resp = self.sessionid.get(self.config['url_uamtk'], params=uamtkdata)
        if resp.status_code != 200:
            print('uamtk 失败',resp.status_code)
            return
        uamtkrespdict = json.loads(resp.text)
        if uamtkrespdict['result_code'] != 0:
            print('uamtk 失败', resp.text)
            return
        print(resp.text)
        uamtkrespdict = json.loads(resp.text)
        #uamauthclient result_code为0时成功
        uamauthclientdata = {"tk": uamtkrespdict['newapptk']}
        print(uamauthclientdata)
        resp = self.sessionid.post(self.config['url_uamauthclient'], data=uamauthclientdata)
        if resp.status_code != 200:
            print('uamtauth 失败', resp.status_code)
            return
        uamtauthrespdict = json.loads(resp.text)
        if uamtauthrespdict['result_code'] != 0:
            print('uamtauth 失败', resp.text)
            return
        print(resp.text)

        #userlogin
        resp = self.sessionid.get(self.config['url_userlogin'])
        if resp.status_code != 200:
            print('userlogin 失败', resp.status_code)
            return

        #initmy12306
        resp = self.sessionid.get(self.config['url_initmy12306'])
        if resp.status_code != 200:
            print('initmy12306 失败', resp.status_code)
            return
        print('登录成功')


    def choicetrainno(self,bookinfos):

            #self.login()

        #查询车次信息
        choiceticketlist = []
        for booktrain in self.config['bookinfos']['info']:
            traininfo = self.gettraininfo(booktrain['trainid'])
            #检查是否有票
            print(traininfo)
            for seattype in booktrain['seattype']:
                if traininfo[seattype]['value'] != '无' and traininfo[seattype]['value'] != '--' :
                    choiceticketlist.append({'traininfo':traininfo,'booktrain':booktrain,'seattype':seattype})
        return choiceticketlist

    def bookticket(self, traininfo,booktrain,seattype):

        resp = self.sessionid.post(self.config['url_checkuser'], data={"_json_att": ""})

        if resp.status_code != 200:
            print('检查用户是否登录失败', resp.status_code)
            return
        checkuser = json.loads(resp.text)
        print(type(checkuser['status']), checkuser['status'])
        if checkuser['status'] != True:
            print("用户未登录,请重新登录")
            return
        fromstationcode = self.config['bookinfos']['fromstation']
        tostationcode = self.config['bookinfos']['tostation']

        #预定车票
        bookticketdata = {
            'secretStr':traininfo['secretStr']['value'],
            'train_date':self.config['bookinfos']['date'],
            'back_train_date':datetime.datetime.now().__format__('%Y-%m-%d'),
            'tour_flag': 'dc',
            'purpose_codes': 'ADULT',
            'query_from_station_name': self.config['station'][fromstationcode],
            'query_to_station_name': self.config['station'][tostationcode]
        }
        print(bookticketdata)

        resp = self.sessionid.post(self.config['url_submitorder'],data=bookticketdata)
        if resp.status_code != 200:
            print('submitorder fail', resp.status_code)
            return

        # 获取train_no、leftTicketStr、fromStationTelecode、toStationTelecode、train_location
        resp = self.sessionid.post(self.config['url_initdc'], data={"_json_att": ""})
        if resp.status_code != 200:
            print('initDc fail', resp.status_code)
            return

        train_no_pat = "'train_no':'(.*?)'"
        leftTicketStr_pat = "'leftTicketStr':'(.*?)'"
        fromStationTelecode_pat = "from_station_telecode':'(.*?)'"
        toStationTelecode_pat = "'to_station_telecode':'(.*?)'"
        train_location_pat = "'train_location':'(.*?)'"
        pattoken = "var globalRepeatSubmitToken.*?'(.*?)'"
        patkey = "'key_check_isChange':'(.*?)'"
        #print('获取token',resp.text)
        train_no_all = re.compile(train_no_pat).findall(resp.text)
        if (len(train_no_all) != 0):
            self.train_no = train_no_all[0]
        else:
            raise Exception("train_no获取失败")
        leftTicketStr_all = re.compile(leftTicketStr_pat).findall(resp.text)
        if (len(leftTicketStr_all) != 0):
            self.leftTicketStr = leftTicketStr_all[0]
        else:
            raise Exception("leftTicketStr获取失败")
        fromStationTelecode_all = re.compile(fromStationTelecode_pat).findall(resp.text)
        if (len(fromStationTelecode_all) != 0):
            self.fromStationTelecode = fromStationTelecode_all[0]
        else:
            raise Exception("fromStationTelecod获取失败")
        toStationTelecode_all = re.compile(toStationTelecode_pat).findall(resp.text)
        if (len(toStationTelecode_all) != 0):
            self.toStationTelecode = toStationTelecode_all[0]
        else:
            raise Exception("toStationTelecode获取失败")
        train_location_all = re.compile(train_location_pat).findall(resp.text)
        if (len(train_location_all) != 0):
            self.train_location = train_location_all[0]
        else:
            raise Exception("train_location获取失败")
        tokenall = re.compile(pattoken).findall(resp.text)
        if (len(tokenall) != 0):
            self.token = tokenall[0]
        else:
            raise Exception("Token获取失败")
        keyall = re.compile(patkey).findall(resp.text)
        if (len(keyall) != 0):
            self.key = keyall[0]
        else:
            raise Exception("key_check_isChange获取失败")
        # 还需要获取train_location
        pattrain_location = "'tour_flag':'dc','train_location':'(.*?)'"
        train_locationall = re.compile(pattrain_location).findall(resp.text)
        if (len(train_locationall) != 0):
            self.train_location = train_locationall[0]
        else:
            raise Exception("train_location获取失败")

        #获取乘客信息
        self.getpassenger(self.token)

        self.checkorderinfo(self.config['seattype'][seattype])
        self.getqueuecount(booktrain['trainid'],self.config['seattype'][seattype])
        self.confirmsingleforqueue(self.config['seattype'][seattype])
        self.queryorderwaittime()
        self.resultorderfordcqueue()
        self.payorderinit()


    def getpassenger(self,token):
        getuserdata ={"REPEAT_SUBMIT_TOKEN": token }
        resp = self.sessionid.post(self.config['url_getpassenger'],data=getuserdata)
        if resp.status_code != 200:
            print('getpassenger fail', resp.status_code)
            return
        # 获取用户信息
        # 提取姓名
        namepat = '"passenger_name":"(.*?)"'
        # 提取身份证
        idpat = '"passenger_id_no":"(.*?)"'
        # 提取手机号
        mobilepat = '"mobile_no":"(.*?)"'
        # 提取对应乘客所在的国家
        countrypat = '"country_code":"(.*?)"'
        nameall = re.compile(namepat).findall(resp.text)
        idall = re.compile(idpat).findall(resp.text)
        mobileall = re.compile(mobilepat).findall(resp.text)
        countryall = re.compile(countrypat).findall(resp.text)
        #print(nameall,idall,mobileall,countryall)
        schema = ['name', 'id', 'iphone', 'nation']

        pps = np.array([nameall,idall,mobileall,countryall])
        passenger = []
        for p in pps.transpose():
            passenger.append(dict(zip(schema, p)))
        self.passenger = passenger
        return self.passenger
    def checkorderinfo(self,seattype):
        checkorderdata = {"cancel_flag": 2,
                     "bed_level_order_num": "000000000000000000000000000000",
                    "passengerTicketStr": seattype+",0,1," + self.passenger[0]['name'] + ",1," + self.passenger[0]['id'] + ",\
                                        " + self.passenger[0]['iphone'] + ",N",
                     "oldPassengerStr": self.passenger[0]['name']  + ",1," + self.passenger[0]['id']  + ",1_",
                     "tour_flag": "dc",
                     "randCode": "",
                     "whatsSelect": 1,
                     "_json_att": "",
                     "REPEAT_SUBMIT_TOKEN": self.token}
        resp = self.requestpost(self.config['url_checkorderinfo'],checkorderdata,200)
        print(__name__,sys._getframe().f_lineno,resp.text)

    #trainid 车次
    def getqueuecount(self,trainid,seattype):

        # 先将字符串转为常规时间格式
        thisdatestr = self.config['bookinfos']['date']  # 需要的买票时间
        thisdate = datetime.datetime.strptime(thisdatestr, "%Y-%m-%d").date()
        # 再转为对应的格林时间
        gmt = "%a %b %d %Y 00:00:00 "
        thisgmtdate = thisdate.strftime(gmt)

        leftstr2 = self.leftTicketStr#.replace("%", "%25")
        queuecountdata = {"train_date": str(thisgmtdate)+"GMT+0800 (中国标准时间)", "train_no": self.train_no,
            "stationTrainCode": trainid, "seatType": seattype, "fromStationTelecode": self.fromStationTelecode,
            "toStationTelecode": self.toStationTelecode, "leftTicket": leftstr2, "purpose_codes": "00",
            "train_location": self.train_location, "_json_att": "", "REPEAT_SUBMIT_TOKEN": str(self.token)
        }
        print(__name__, sys._getframe().f_lineno, queuecountdata)
        resp = self.requestpost(self.config['url_getqueuecount'], queuecountdata, 200)
        print(__name__, sys._getframe().f_lineno, resp.text)
    #座位类型
    def confirmsingleforqueue(self,seattype):
        time.sleep(1)
        confirmsingleforqueue = {
            "passengerTicketStr": seattype+",0,1," + self.passenger[0]['name'] + ",1," + self.passenger[0]['id'].strip() + "," + self.passenger[0]['iphone'].strip() + ",N_",
            "oldPassengerStr": self.passenger[0]['name'] + ",1," + self.passenger[0]['id'].strip() + ",1_",
                "randCode": "",
            "purpose_codes": "00",
            "key_check_isChange": self.key,
            "leftTicketStr": self.leftTicketStr,
            "train_location": self.train_location, "choose_seats": "", "seatDetailType": "000", "whatsSelect": "1",
            "roomType": "00", "dwAll": "N", "_json_att": "", "REPEAT_SUBMIT_TOKEN": self.token
        }
        print(__name__, sys._getframe().f_lineno, confirmsingleforqueue)
        resp = self.requestpost(self.config['url_confirmsingleforqueue'], confirmsingleforqueue, 200)
        print(__name__, sys._getframe().f_lineno, resp.text)

    def queryorderwaittime(self):
        time1 = time.time()
        while True:
            # 总请求4-确认步骤2-获取orderid
            time2 = time.time()
            if ((time2 - time1) // 60 > 5):
                raise Exception("获取orderid超时,正在进行新一次抢购")
                break

            queryorderwaittime = {"random": str(int(time.time() * 1000)),
                                  "tourFlag": "dc", "_json_att": "",
                                "REPEAT_SUBMIT_TOKEN": str(self.token)
                                }
            print(__name__, sys._getframe().f_lineno, queryorderwaittime)
            resp = self.requestget(self.config['url_queryorderwaittime'], queryorderwaittime, 200)
            print(__name__, sys._getframe().f_lineno, resp.text)

            patorderid = '"orderId":"(.*?)"'
            orderidall = re.compile(patorderid).findall(resp.text)
            if (len(orderidall) == 0):
                print("未获取到orderid,正在进行新一次的请求。")
                continue
            else:
                self.orderid = orderidall[0]
                break

    def resultorderfordcqueue(self):
        resultorderfordcqueue = {"orderSequence_no":self.orderid,
                                 "_json_att":"",
                                 "REPEAT_SUBMIT_TOKEN": str(self.token)
                                }
        print(__name__, sys._getframe().f_lineno, resultorderfordcqueue)
        resp = self.requestget(self.config['url_resultorderfordcqueue'], resultorderfordcqueue, 200)
        print(__name__, sys._getframe().f_lineno, resp.text)

    def payorderinit(self):
        payorderinit = {"_json_att":"",
                        "REPEAT_SUBMIT_TOKEN":str(self.token)
                        }
        resp = self.requestpost(self.config['url_payorderinit'], payorderinit, 200)
        print(__name__, sys._getframe().f_lineno, resp.text)

    def getcaptchxy(self,pic):
            xy=()
            if (pic == 1):
                xy = (35, 45)
            if (pic == 2):
                xy = (112, 45)
            if (pic == 3):
                xy = (173, 45)
            if (pic == 4):
                xy = (253, 45)
            if (pic == 5):
                xy = (35, 114)
            if (pic == 6):
                xy = (112, 114)
            if (pic == 7):
                xy = (173, 114)
            if (pic == 8):
                xy = (253, 114)
            return xy
    def gettraininfo(self,train_no):
        for train in self.querytrain():
            if train['train_no']['value'] == train_no:
                return train

    def requestpost(self,url,data,status_code):

        resp = self.sessionid.post(url,data=data)
        if resp.status_code != status_code:
            raise Exception(self.parseurl['path'] + ' fail', resp.status_code)
        return resp
    def requestget(self,url,params,status_code):
        resp = self.sessionid.get(url,params=params)
        if resp.status_code != status_code:
            raise Exception(self.parseurl['path']+' fail',resp.status_code)

        return resp

    def parseurl(self,url):
        regexp = (r'^(?P<scheme>[a-z][\w\.\-\+]+)?:(//)?'
                  r'(?:(?P<username>\w+):(?P<password>[\w\W]+)@|)'
                  r'(?P<domain>[\w-]+(?:\.[\w-]+)*)(?::(?P<port>\d+))?/?'
                  r'(?P<path>\/[\w\.\/-]+)?(?P<query>\?[\w\.*!=&@%;:/+-]+)?'
                  r'(?P<fragment>#[\w-]+)?$')
        match = re.search(regexp, url.strip(), re.U)
        if match is None:
            raise ValueError('Incorrent url: {0}'.format(url))
        url_parts = match.groupdict()
        return url_parts
    def bookjob(self):
        choicetrainlist = self.choicetrainno(self.config['bookinfos'])
        for dict in choicetrainlist:
            self.bookticket(dict['traininfo'],dict['booktrain'],dict['seattype'])




if __name__ == '__main__':
    tk = Ticket12306()
    tk.get_config()

    tk.login()
    tk.bookjob()


    #tk.showtickets()
    #tk.bookticket(1)

 

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值