Python大神给免费分享一个爬取12306火车票例子(附源码)

31 篇文章 0 订阅
31 篇文章 0 订阅

快过年了,Python大神给免费分享一个爬取12306火车票例子(附源码)

。。。


上面是以前写的文章的资源,在以前的文章中有对应的资源,有兴趣的可以去查看。


作为一种便捷地收集网上信息并从中抽取出可用信息的方式,网络爬虫技术变得越来越有用。使用Python这样的简单编程语言,你可以使用少量编程技能就可以爬取复杂的网站。


如果手机上显示代码错乱,请分享到QQ或者其他地方,用电脑查看!!!


python能干的东西有很多,这里不再过多叙述,直接重点干货。

快过年了,Python大神给免费分享一个爬取12306火车票例子(附源码)



在这里还是要推荐下我自己建的Python开发学习群:483546416,群里都是学Python开发的,如果你正在学习Python ,小编欢迎你加入,大家都是软件开发党,不定期分享干货(只有Python软件开发相关的),包括我自己整理的一份2018最新的Python进阶资料和高级开发教程,欢迎进阶中和进想深入Python的小伙伴

。。。

重要提醒:要运行,需要确认安装环境:Python2.7,还是Python3.x。

缺少的包,采用命令行:pip install 包名

进行安装,不然运行不了。

快过年了,Python大神给免费分享一个爬取12306火车票例子(附源码)

...


主要源码:

用户信息配置:

配置信息很重要,一定不要弄错了,不然不能成功!各种帐号密码,电话,姓名,身份证。

###################################CONFIG START##################################
#账户信息
user='帐号'
passwd='密码'
#想买的车次,为空的话会进入交互阶段,需要手动输入车次,建议设置上
g_buy_list = ["K827", "K587", "K841", "K1224", "K836"]
#忽略的车次
g_ingnore_list = ["L74"]
#买票查询条件:时间、站点, 车站代码可以在info目录station_code.txt里查
g_query_data = [
("leftTicketDTO.train_date", "2017-12-27"),
("leftTicketDTO.from_station", "GZQ"),
("leftTicketDTO.to_station", "LZZ"),
("purpose_codes", "ADULT"),
]
#乘客信息
g_passengers = [
{
"name": u"姓名",
"id": "身份证",
"tel": "电话",
}, 
]
'''
g_str_seat_types = {
 u"高级软卧":"gr_num",
 u"软卧":"rw_num",
 u"软座":"rz_num",
 u"特等座":"tz_num",
 u"无座":"wz_num",
 u"硬卧":"yw_num",
 u"硬座": "yz_num",
 u"二等座":"ze_num",
 u"一等座":"zy_num",
 u"商务座":"swz_num",
 }
'''
#座位类型,类型名在g_str_seat_types里有对应
g_care_seat_types = ["rw_num", "yw_num"]
#自动识别验证码次数,验证码无重叠无背景时候识别率高,基于tesseract的OCR
#目前仅仅遇到过一次,几个小时,dns更新后连接到的服务器有背景干扰
#可以找到这种服务器并修改host让其一直连接此服务器
#或者做更多的图像相关处理,去除噪点再做OCR
g_max_auto_times = 0
#刷新间隔
g_query_sleep_time = 1
###################################End##################################


抢票代码:

千万注意要导入的包,注意注意,不然运行可能会报错!

import urllib, sys, os, time, json
# import http.httplib
import http.client
import gzip
from io import StringIO;
import traceback
import logging
import datetime
import cProfile
import subprocess
#加载你的配置文件名
#from conf_frankie_test import *
#from conf_neil import *
#from conf_example import *
#清理临时文件,如验证码等
from shuapiao12306.conf_example import g_passengers, g_max_auto_times, passwd, g_buy_list, g_ingnore_list, \
g_care_seat_types, g_query_data, g_query_sleep_time, user
g_clean_temp = False
##########################internal###############################
g_str_train_types = {
"G": u"高铁",
"L": u"临客",
"D": u"动车",
"Z": u"直达",
"T": u"特快",
"K": u"快速",
}
#g_seat_code
g_seat_code_dict = {
"yz_num":"1",
"rz_num":"2",
"yw_num":"3",
"rw_num":"4",
"gr_num":"6",
"tz_num":"P",
"wz_num":"WZ",
"ze_num":"O",
"zy_num":"M",
"swz_num":"9",
}
logger = logging.getLogger('shuapiao')
g_conn = http.client.HTTPConnection('kyfw.12306.cn', timeout=100)
#restart conn
def restart_conn(conn):
print ("restart connection")
conn.close()
conn = http.client.HTTPConnection('kyfw.12306.cn', timeout=100)
conn.connect()
#装饰器
def retries(max_tries):
def dec(func, conn=g_conn):
def f2(*args, **kwargs):
tries = range(max_tries)
#tries.reverse()
for tries_remaining in tries:
try:
return func(*args, **kwargs)
except http.client.HTTPException as e:
print ("conneciont error")
restart_conn(conn) 
except Exception as e:
if tries_remaining > 0:
traceback.print_exc()
logger.error("errror %d" % tries_remaining)
logger.error(traceback.format_exc())
else:
raise e
else:
break
 return f2
return dec
#调用OCR
def call_tesseract(in_file):
tesseract_exe_name = 'tesseract'
expect_len = 4
out_file = "o"
args = [tesseract_exe_name, in_file, out_file]
proc = subprocess.Popen(args)
ret = proc.wait()
if ret != 0:
print ("call tesseract failed:%d" % ret)
return ''
out_full = out_file + '.txt'
f = open(out_full)
text = f.read()
f.close()
if g_clean_temp:
os.remove(out_full)
text = text.rstrip('\r\n')
text = text.replace(" ", "")
print ("auto read rand_code:%s" % text)
if len(text) != expect_len:
print ("auto read faild:%s, %d" % (text, len(text)))
return ''
return text
'''
 HttpAuto
'''
class HttpAuto:
def __init__(self):
self.ext_header = {
"Accept":"*/*",
"X-Requested-With":"XMLHttpRequest",
"Referer": "http://kyfw.12306.cn/otn/login/init#",
"Accept-Language": "zh-cn",
"Accept-Encoding": "gzip, deflate",
"Connection":"Keep-Alive",
"Cache-Control": "no-cache",
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
}
self.proxy_ext_header = {
"Accept": "*/*",
"X-Requested-With":"XMLHttpRequest",
"Referer": "http://kyfw.12306.cn/otn/login/init#",
"Accept-Language": "zh-cn",
"Accept-Encoding": "gzip, deflate",
"Proxy-Connection": "Keep-Alive",
"Pragma": "no-cache",
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
}
#cockies
self.sid = ''
self.sip = ''
#passenger info to be POST
self.passengerTicketStr = ''
self.oldPassengerStr = ''
#used to POST
self.globalRepeatSubmitToken = ''
self.key_check_isChange = ''
self.orderId = ''
self.pass_code = 'abcd'
self.rand_code = 'abcd'
return
 def construct_passengerTicketStr(self):
print ("###construct_passengerTicketStr###")
str1 = ''
str2 = ''
for p in g_passengers:
str1 = str1 + '1,0,1,' + p['name'] + ',1,' + p['id'] + ','+ p['tel']+ ',N_'
str2 = str2 + p['name'] + ',1,' + p['id'] + ',1_'
str1 = str1[:-1]
self.passengerTicketStr = str1.encode('utf8')
self.oldPassengerStr = str2.encode('utf8')
print ("new:%s" % self.passengerTicketStr)
print ("old:%s" % self.oldPassengerStr)
def logout(self):
url_logout = "http://kyfw.12306.cn/otn/login/loginOut"
g_conn.request('540', url_logout, headers=self.proxy_ext_header)
return True
 def __del__(self):
self.logout()
print ("close connnection")
g_conn.close()
return
 def update_session_info(self, res):
print ("process header cookie")
update = False
 for h in res.getheaders():
if h[0] == "set-cookie":
l = h[1].split(',')[0].strip()
if l.startswith('JSESSIONID'):
self.sid = l.split(';')[0].strip()
update = True
print ("Update sessionid "+self.sid)
if l.startswith('BIGipServerotn'):
self.sip = l.split(';')[0].strip()
update = True
print ("Update sip:"+self.sip)
l = h[1].split(',')[1].strip()
if l.startswith('BIGipServerotn'):
self.sip = l.split(';')[0].strip()
update = True
print ("Update sip:"+self.sip)
return update
def check_pass_code_common(self, module, rand_method):
ret = False
auto_times = g_max_auto_times
while 1:
url_pass_code = "http://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=%s&rand=%s" % (module, rand_method)
print ("send getPassCodeNew:%s" % datetime.datetime.now())
header = ''
if module == 'login':
header = self.ext_header
else:
header = self.proxy_ext_header
g_conn.request('GET', url_pass_code, headers=header)
res = g_conn.getresponse()
print ("recv getPassCodeNew=====>:%s" % datetime.datetime.now())
if module == 'login':
self.update_session_info(res)
self.ext_header["Cookie"] = self.sid+';'+self.sip
#save file 
pic_type = res.getheader('Content-Type').split(';')[0].split('/')[1]
data = res.read()
file_name = "./pass_code.%s" % pic_type
f = open(file_name, 'wb')
f.write(data)
f.close()
#auto read or manual
read_pass_code = ''
if g_max_auto_times > 0:
auto_times = auto_times - 1
read_pass_code = call_tesseract(file_name)
if read_pass_code == '':
read_pass_code = input("input passcode(%s):" % file_name)
if read_pass_code == "no":
print ("Get A new PassCode")
continue
 elif read_pass_code == "quit":
print ("Quit")
break
print ("input:%s" % read_pass_code)
else:
print ("auto:%s" % read_pass_code)
if g_clean_temp:
os.remove(file_name)
data = []
if module == 'passenger':
self.proxy_ext_header["Referer"] = "http://kyfw.12306.cn/otn/confirmPassenger/initDc#nogo"
self.rand_code = read_pass_code
data = [
("_json_att", ''),
("rand", rand_method),
("randCode", read_pass_code),
("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),
]
elif module == 'login':
self.pass_code = read_pass_code
data = [
("randCode", read_pass_code),
("rand", rand_method)
]
else:
pass
post_data = urllib.urlencode(data)
print ("send checkRandCodeAnsyn=====>:" )#% post_data
url_check_rand = "http://kyfw.12306.cn/otn/passcodeNew/checkRandCodeAnsyn"
g_conn.request('POST', url_check_rand, body=post_data, headers=header)
res = g_conn.getresponse()
data = res.read()
print ("recv checkRandCodeAnsyn")
resp = json.loads(data)
if resp['data'] != 'Y':
print ("status error:%s" % resp['data'])
continue
 else:
ret = True
 break
 return ret
@retries(3)
def check_pass_code(self):
print ("#############################Step1:Passcode#########")
module = 'login'
rand_method = 'sjrand'
return self.check_pass_code_common(module, rand_method)
@retries(3)
def check_rand_code(self):
print ("#############################Step8:Randcode#########")
ret = False
module = 'passenger'
rand_method = 'randp'
return self.check_pass_code_common(module, rand_method)
@retries(3) 
def loginAysnSuggest(self):
if not self.check_pass_code():
return False
print ("#############################Step2:Login#########")
url_login = "http://kyfw.12306.cn/otn/login/loginAysnSuggest"
data = [
("loginUserDTO.user_name", user),
("userDTO.password", passwd),
("randCode", self.pass_code)
]
post_data = urllib.urlencode(data)
#post_data="loginUserDTO.user_name=frankiezhu%%40foxmail.com&userDTO.password=sky123&randCode=%s" % self.pass_code
self.proxy_ext_header["Cookie"] = self.sid+';'+self.sip
print ("send loginAysnSuggest=====>" ) #% post_data
g_conn.request('POST', url_login, body=post_data, headers=self.proxy_ext_header)
res = g_conn.getresponse()
print ("recv loginAysnSuggest")
data = res.read()
res_json = json.loads(data)
if res_json['status'] != True or not res_json['data'].has_key('loginCheck'):
print (u"return error:%s" % ' '.join(res_json['messages']))
return False
 if res_json['data']['loginCheck'] == 'Y':
print (u"login success")
return True 
 else:
print( u"login error %s" % res_json['data']['loginCheck'])
return False
 def show_ticket(self, it):
print( it['station_train_code'], it['from_station_name'],it['to_station_name'],it['start_time'], it['arrive_time'],it['lishi'], \
it['swz_num'],it['tz_num'], it['zy_num'],it['ze_num'],it['gr_num'], it['rw_num'],it['yw_num'],it['rz_num'],it['wz_num'],it['canWebBuy'])
return
############
 #retcode: -2 for retry, -1 for error, 0 for success
 ############
def do_ticket(self, json_data, result, want_special):
ret = 0
for item in json_data['data']:
if item['queryLeftNewDTO']['canWebBuy'] == 'N':
continue 
train_code = item['queryLeftNewDTO']['station_train_code']
if want_special and not train_code in g_buy_list:
continue
 if train_code in g_ingnore_list:
continue
has_ticket = False
 for care_type in g_care_seat_types:
if item['queryLeftNewDTO'][care_type] != "--" and item['queryLeftNewDTO'][care_type] != u"无":
has_ticket = True
 break
 if has_ticket:
result[train_code] = item
#query return none, retry
if not len(result):
return -2
#as the list prority
if want_special:
for train_code in g_buy_list:
if not result.has_key(train_code):
continue
ret = self.buy(result[train_code])
if not ret:
print ("Err during buy")
return -1
else:
return 0
#show all
for train_code, item in result.items():
self.show_ticket(item['queryLeftNewDTO'])
#get promote
cmd = input("input cmd[r|q|K101]:")
cmd = cmd.strip()
print ("input:%s" % cmd)
if cmd == "r":
print ("retry")
return -2
elif cmd == "q":
print ("quit")
return 0
else:
print ("buy ticket:%s" % cmd)
ret = self.buy(result[cmd])
if not ret:
print ("Err during buy")
return -1
else:
return 0
@retries(3) 
def query(self):
print ("#############################Step3:Query#########")
self.proxy_ext_header["Referer"] = "http://kyfw.12306.cn/otn/leftTicket/init"
url_query = "http://kyfw.12306.cn/otn/leftTicket/query?" + urllib.urlencode(g_query_data)
print ("start query======>%s" % url_query)
want_special = False
 if len(g_buy_list) != 0:
want_special = True
print ("JUST For:%s" % (','.join(g_buy_list)))
else:
print (u"车次 出发->到达 时间:到达 历时 商务座 特等座 一等座 二等座 高级软卧 软卧 硬卧 软座 硬座 无座 其他备注")
#"http://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2014-01-04&leftTicketDTO.from_station=SHH&leftTicketDTO.to_station=NJH&purpose_codes=ADULT"
q_cnt = 0
while 1:
q_cnt = q_cnt + 1
g_conn.request('GET', url_query, headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = ''
if res.getheader('Content-Encoding') == 'gzip':
tmp = StringIO.StringIO(res.read())
gzipper = gzip.GzipFile(fileobj=tmp)
data = gzipper.read()
else:
data = res.read()
res_json = json.loads(data)
if res_json['status'] != True:
print ("parse json failed! data %s" % data)
continue
result = {}
ret = self.do_ticket(res_json, result, want_special)
if ret == 0:
break
 elif ret == -2:
print (u"no ticket, refresh %d times!" % q_cnt)
time.sleep(g_query_sleep_time)
continue
 return True
@retries(3)
def confirmPassenger_get_token(self):
print ("#############################Step6:confirmPassenger_get_token #########")
url_confirm_passenger = "http://kyfw.12306.cn/otn/confirmPassenger/initDc"
g_conn.request('GET', url_confirm_passenger, headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = res.read()
if res.getheader('Content-Encoding') == 'gzip':
tmp = StringIO.StringIO(data)
gzipper = gzip.GzipFile(fileobj=tmp)
data = gzipper.readlines()
key_word = "globalRepeatSubmitToken"
key_find = False
line_token = ''
line_request_info = ''
for line in data:
if line.startswith(u' var globalRepeatSubmitToken = '.encode("utf8")):
line_token = line.decode("utf8")
continue
 elif line.startswith(u' var ticketInfoForPassengerForm'.encode("utf8")):
line_request_info = line.decode("utf8")
key_find = True
 break
 if key_find:
self.globalRepeatSubmitToken = line_token.split('=')[1].strip()[1:-2]
print ("Update globalRepeatSubmitToken=%s" % self.globalRepeatSubmitToken)
req_data = line_request_info.split('=')[1].strip()[:-1]
req_data = req_data.replace("null", "''")
req_data = req_data.replace("true", "True")
req_data = req_data.replace("false", "False")
print ("line_request_info")
req_json = eval(req_data)
self.key_check_isChange = req_json['key_check_isChange']
self.leftTicketStr = req_json['leftTicketStr']
print ("Update key_check_isChange=%s" % self.key_check_isChange)
return True
 else:
print ("globalRepeatSubmitToken not found")
return False
@retries(3)
def getQueueCount(self, item):
print ("#############################Step:getQueueCount #########")
url_queue_count = "http://kyfw.12306.cn/otn/confirmPassenger/getQueueCount"
#buy_date = 'Sun Jan 5 00:00:00 UTC+0800 2014'
tlist = time.ctime().split()
tlist[3] = '00:00:00'
tlist.insert(4, 'UTC+0800')
buy_date = ' '.join(tlist)
for t_type in g_care_seat_types:
if item['queryLeftNewDTO'][t_type] != "--" and item['queryLeftNewDTO'][t_type] != u"无":
break
s_type = g_seat_code_dict[t_type]
data = [
("train_date", buy_date),
("train_no", item['queryLeftNewDTO']['train_no']),
("stationTrainCode",item['queryLeftNewDTO']['station_train_code']),
("seatType", s_type),
("fromStationTelecode", item['queryLeftNewDTO']['from_station_telecode']),
("toStationTelecode", item['queryLeftNewDTO']['to_station_telecode']),
("leftTicket",item['queryLeftNewDTO']['yp_info']),
("purpose_codes", "00"),
("_json_att", ''),
("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),
]
post_data = urllib.urlencode(data)
print ("send getQueueCount=====>" ) #% post_data
g_conn.request('POST', url_queue_count, body=post_data, headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = res.read()
res_json = json.loads(data)
print ("recv getQueueCount:%s" % res_json)
if res_json['status'] != True:
print ("getQueueCount error :%s" % res_json)
return False
 return True
@retries(3)
def checkOrderInfo(self):
print ("#############################Step9:checkOrderInfo #########")
url_check_order = "http://kyfw.12306.cn/otn/confirmPassenger/checkOrderInfo"
data = [
("cancel_flag", "2"),
("bed_level_order_num", "000000000000000000000000000000"),
("passengerTicketStr", self.passengerTicketStr),
("oldPassengerStr", self.oldPassengerStr),
("tour_flag","dc"),
("randCode",self.rand_code),
("_json_att", ''),
("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),
]
post_data = urllib.urlencode(data)
print ("send checkOrderInfo=====>")
#print "cancel_flag=2&bed_level_order_num=000000000000000000000000000000&passengerTicketStr=1%2C0%2C1%2C%E6%9C%B1%E5%AD%94%E6%B4%8B%2C1%2C320721198711180812%2C13430680458%2CN&oldPassengerStr=%E6%9C%B1%E5%AD%94%E6%B4%8B%2C1%2C320721198711180812%2C1_&tour_flag=dc&randCode=ewgw&_json_att=&REPEAT_SUBMIT_TOKEN=ad51ea02d933faf91d3d2eaeb5d85b3e"
g_conn.request('POST', url_check_order, body=post_data, headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = res.read()
res_json = json.loads(data)
print ("recv checkOrderInfo:%s" % res_json)
if res_json['status'] != True or res_json['data']['submitStatus'] != True:
print ("checkOrderInfo error :%s" % res_json['data']['errMsg'])
return False
 return True
@retries(3)
def checkUser(self):
print ("#############################Step4:checkUser #########")
url_check_info = "http://kyfw.12306.cn/otn/login/checkUser"
data = [
('_json_att', ''),
]
post_data = urllib.urlencode(data)
print (post_data)
print ("send checkUser=====>") #% post_data
g_conn.request('POST', url_check_info, body=post_data, headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = res.read()
res_json = json.loads(data)
print ("recv checkUser")
if not res_json['data'].has_key('flag') or res_json['data']['flag'] != True:
print ("check user failed, %s" % res_json)
return False
 else:
return True
@retries(3)
def submitOrderRequest(self, item):
print ("#############################Step5:submitOrderRequest #########")
url_submit = "http://kyfw.12306.cn/otn/leftTicket/submitOrderRequest"
post_data = "secretStr=" + item['secretStr']+"&train_date=" \
+ item['queryLeftNewDTO']['start_train_date'] \
+ "&back_train_date=" + item['queryLeftNewDTO']['start_train_date'] \
+ "&tour_flag=dc&purpose_codes=ADULT&query_from_station_name=" \
+ item['queryLeftNewDTO']['from_station_name'] \
+ "&query_to_station_name="+item['queryLeftNewDTO']['to_station_name']\
+ "&undefined"
print (post_data)
print ("send submitOrderRequest=====>") #% post_data
g_conn.request('POST', url_submit, body=post_data.encode("utf8"), headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = res.read()
res_json = json.loads(data)
if res_json['status'] != True:
print (u"submit order failed")
print (data)
print (''.join(res_json['messages']).encode('gb2312'))
return False
 else:
return True
@retries(3)
def confirmSingleForQueue(self):
print ("#############################Step11:confirmSingleForQueue #########")
url_check_info = "http://kyfw.12306.cn/otn/confirmPassenger/confirmSingleForQueue"
data = [
('passengerTicketStr', self.passengerTicketStr),
("oldPassengerStr", self.oldPassengerStr),
('randCode', self.rand_code),
('purpose_codes', "00"),
('key_check_isChange', self.key_check_isChange),
('leftTicketStr', self.leftTicketStr),
('train_location', 'H2'),
('_json_att', ''),
("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken),
]
post_data = urllib.urlencode(data)
print ("send confirmSingleForQueue=====>") #% post_data
g_conn.request('POST', url_check_info, body=post_data, headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = res.read()
res_json = json.loads(data)
print ("recv confirmSingleForQueue")
if not res_json['data'].has_key('submitStatus') or res_json['data']['submitStatus'] != True:
print (u"confirmSingleForQueue failed, %s" % res_json)
return False
 else:
return True
@retries(5) 
def queryOrderWaitTime(self):
print ("#############################Step12:queryOrderWaitTime #########")
url_query_wait = "http://kyfw.12306.cn/otn/confirmPassenger/queryOrderWaitTime?"
cnt = 0
while 1:
data = [
('random', int(time.time())),
("tourFlag", "dc"),
('_json_att', ''),
("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken), 
]
url_query_wait = url_query_wait + urllib.urlencode(data)
print ("send queryOrderWaitTime:%d=====>" % cnt) #% url
g_conn.request('GET', url_query_wait, headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = res.read()
res_json = json.loads(data)
print ("recv queryOrderWaitTime:%s" % res_json)
cnt = cnt + 1
if not res_json['data'].has_key('data') or res_json['data']['queryOrderWaitTimeStatus'] != True:
print ("queryOrderWaitTime error")
print (res_json['messages'])
break
 if res_json['data']['waitCount'] == 0:
self.orderId = res_json['data']['orderId']
print ("Update orderId:%s" % self.orderId)
break
 else:
continue
 return True
@retries(3)
def resultOrderForDcQueue(self):
print ("#############################Step13:resultOrderForDcQueue #########")
url_result = "http://kyfw.12306.cn/otn/confirmPassenger/resultOrderForDcQueue"
data = [
('orderSequence_no', self.orderId),
('_json_att', ''),
("REPEAT_SUBMIT_TOKEN", self.globalRepeatSubmitToken), 
]
post_data = urllib.urlencode(data)
print ("send resultOrderForDcQueue=====>") #% url
g_conn.request('POST', url_result, body=post_data, headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = res.read()
res_json = json.loads(data)
print ("recv queryOrderWaitTime")
if not res_json['data'].has_key('submitStatus') or res_json['data']['submitStatus'] != True:
print ("submit error")
print (data)
return False
 else:
print ("#############################Success check ticket in webbrowser #########")
return True
@retries(3)
def get_passenger_info(self):
print ("#############################Step7:getPassengerDTOs #########")
url_get_passager_info = "http://kyfw.12306.cn/otn/confirmPassenger/getPassengerDTOs"
data = [
('_json_att', ''),
('REPEAT_SUBMIT_TOKEN', self.globalRepeatSubmitToken)
]
post_data = urllib.urlencode(data)
print ("send getPassengerDTOs=====>") #% post_data
g_conn.request('POST', url_get_passager_info, body=post_data, headers=self.proxy_ext_header)
res = g_conn.getresponse()
data = res.read()
res_json = json.loads(data)
print ("recv getPassengerDTOs")
return True
 def buy(self, item):
#Step4
if not self.checkUser():
return False
#Step5
if not self.submitOrderRequest(item):
return False
#Step6
if not self.confirmPassenger_get_token():
return False
self.proxy_ext_header["Referer"] = "http://kyfw.12306.cn/otn/confirmPassenger/initDc#nogo"
#Step7
 #self.get_passenger_info
 #Step8
if not self.check_rand_code():
return False
#Step9
if not self.checkOrderInfo():
return False
#Step10
if not self.getQueueCount(item):
return False
#Step11
if not self.confirmSingleForQueue():
return False
 if not self.queryOrderWaitTime():
return False
#Step13
if not self.resultOrderForDcQueue():
return False
 return True
def clean_temp_files():
print ("clean_temp_files")
pass
##############################################test#############################
@retries(3)
def test_retries():
print( "test")
raise NameError#httplib.HTTPException
def test_ocr():
f_name = "pass_code.jpeg"
text = call_tesseract(f_name)
print ("read:%s" % text)
@retries(3)
def test_reconnect():
header = {
"Accept":"*/*",
"X-Requested-With":"XMLHttpRequest",
"Accept-Language": "zh-cn",
"Accept-Encoding": "gzip, deflate",
"Connection":"Keep-Alive",
"Cache-Control": "no-cache",
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
}
url = "http://www.baidu.com"
for i in range(3):
print ("send")
g_conn.request('GET', url, headers=header)
res = g_conn.getresponse()
data = res.read()
print ("send")
restart_conn(g_conn)
def test_get_svr_ips():
print ("test_get_svr_ips")
pass
##############################################test#############################
def show_conf():
print ("########show conf##############")
print ("Buy:%s" % (','.join(g_buy_list)))
print ("Ingnore:%s" % (','.join(g_ingnore_list)))
print ("Query data:", g_query_data)
print ("Passengers:", g_passengers)
print ("Sleep time:%f" % g_query_sleep_time)
print ("Auto OCR: %d" % g_max_auto_times)
print ("\n")
def main():
show_conf()
#set log
hdlr = logging.FileHandler('.\log.txt')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
hdlr.setFormatter(formatter)
logger.addHandler(hdlr)
logger.setLevel(logging.WARNING)
#test_retries()
print ("connecting......")
g_conn.connect()
ha = HttpAuto()
ha.construct_passengerTicketStr()
if not ha.loginAysnSuggest():
return False
 while 1:
try:
ha.query()
except Exception as e:
traceback.print_exc()
return True
if __name__ == '__main__':
#test_ocr()
 #test_reconnect()
main()

参考项目完整源码:

真的放不了地址,请私信我,已经打包好了。

如有需要早下载,可能会失效!


以上是全部内容,只是善于分享,不足之处请包涵!爬虫基本的原理就是,获取源码,进而获取网页内容。一般来说,只要你给一个入口,通过分析,可以找到无限个其他相关的你需要的资源,进而进行爬取。


我也写了很多其他的非常简单的入门级的爬虫详细教程,关注后,点击我的头像,就可以查看到。


欢迎大家一起留言讨论和交流,谢谢!

  • 2
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 9
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值