12306登录python_python爬虫 -- 12306登录刷票

最新推荐文章于 2022-07-21 01:16:12 发布

weixin_39928102

最新推荐文章于 2022-07-21 01:16:12 发布

阅读量140

点赞数

文章标签： 12306登录python

# -*- coding: utf-8 -*-

from splinter.browser import Browser

from time import sleep

import traceback

# Message: 'phantomjs' executable needs to be in PATH.

# geckodriver 要自行下载

# phantomjs 自行下载

# we need:

# sudo mv geckodriver /usr/local/bin/

# 1. 自动识别验证码--文字图片等

# 2. 选择: G?D?C? 靠窗?走廊? 二等座?头等座?

# 3. 12306的验证码是一个jpg,如果你是通过url获取这个图片,会和登陆界面的不同,由随机因子产生的.通过截图方式来解析:

# 识别文字: 转换为图片中的图画,给个位置然后触发click事件

#初始化信息

# 用户名，密码

username = u"username@mail.cn"

passwd = u"yourpassword"

# cookies值: 页面信息--安全--查看cookies

shenzhen = u"%u6DF1%u5733%2CSZQ" #深圳

wuhan = u"%u6B66%u6C49%2CWHN" #武汉

yueyangdong = u"%u5CB3%u9633%u4E1C%2CYIQ" #岳阳东

#shanghai = u"%u4E0A%u6D77%2CSHH" #上海

#taiyuan = u"%u592A%u539F%2CTYV" # 太原

#hangzhou = u"%u676D%u5DDE%u4E1C%2CHGH" #杭州东

starts = shenzhen

ends = yueyangdong

# 时间格式2018-01-02

dtime = u"2018-02-10"

# 车次，选择第几趟，0则从上之下依次点击

order = 0

###乘客名,如果是自己，则上面有个链接是自己的，会跳转到个人中心,用index[1]

#如果乘客不是自己，则用index[0]

pa = u"张三"

###乘客名

users = [u"张三", u"李四"]

"""网址"""

ticket_url = "https://kyfw.12306.cn/otn/leftTicket/init"

login_url = "https://kyfw.12306.cn/otn/login/init"

initmy_url = "https://kyfw.12306.cn/otn/index/initMy12306"

def login():

#点击登录，从购票页面调到了登录页面

b.find_by_text(u"登录").click()

sleep(3)

#给name为loginUserDTO.user_name附上用户名

#也可以用Id查找的方法b.find_by_id(‘username‘).fill(username)

b.fill("loginUserDTO.user_name", username)

sleep(1)

#给name为userDTO.password值

b.fill("userDTO.password", passwd)

sleep(1)

# 手动输入验证码, 自动验证的方式还在研究中: a.获取图片验证码(截图方式,应该是不能通过获取图片的链接地址来获取图片?,或者其他方式:cookies/session? b.解析图片验证码(通过位置?还是真正意义的图像识别?)

# 了解下ocr( tesseract-ocr), pytesseract?CNN卷积神经网络?

print u"等待验证码，自行输入..."

#检测页面是否跳转(用户是否输入了验证码并且提交)

while True:

if b.url != initmy_url:

sleep(1)

else:

break

def huoche():

global b

# b = Browser(driver_name="firefox", executable_path="/usr/bin/firefox")

b = Browser(driver_name="firefox")

print u"访问12306..."

b.visit(ticket_url)

#查找带登录的字符串，u是避免中文报错

while b.is_text_present(u"登录"):

sleep(1)

#如果存在调用登录方法

if b.url == initmy_url:

break

#当执行完login方法后，登录成功，继续向下执行

try:

print u"购票页面..."

# 跳回购票页面

b.visit(ticket_url)

# 加载查询信息

b.cookies.add({"_jc_save_fromStation": starts})

b.cookies.add({"_jc_save_toStation": ends})

b.cookies.add({"_jc_save_fromDate": dtime})

b.reload()

sleep(2)

# 增加过滤条件,,,

b.find_by_text(u"GC-高铁/城际").click()

count = 0

# 循环点击预订

#order=0则从第一班车开始全部一次预订

if order != 0:

#如果页面没有跳转到，说明没有抢到票，继续抢票

while b.url == ticket_url:

#点击查询进行刷新

b.find_by_text(u"查询").click()

count +=1

print u"循环点击查询... 第 %s 次" % count

sleep(1)

try:

#点击预订的那班车

b.find_by_text(u"预订")[order - 1].click()

except:

print u"还没开始预订"

continue

else:

while b.url == ticket_url:

b.find_by_text(u"查询").click()

count += 1

print u"循环点击查询... 第 %s 次" % count

sleep(1)

try:

#从第一个开始一次预订

for i in b.find_by_text(u"预订"):

i.click()

except:

print u"还没开始预订"

continue

sleep(1)

print u"开始选择用户..."

for user in users:

b.find_by_text(user).last.click()

#for user in self.users:

# b.find_by_text(user).last.click()

print u"提交订单..."

sleep(1)

b.find_by_text(u"提交订单").click()

sleep(0.5)

b.find_by_text(u"确认").click()#此处是脚本,待改善

#若循环结束了，则选择联系人

#b.find_by_text(pa)[0].click()

print u"能做的都做了.....不再对浏览器进行任何操作"

except Exception as e:

print(traceback.print_exc())

if __name__ == "__main__":

huoche()

# -*- coding: utf-8 -*-

# 仅可用来爬取数据后来进行测试分析

#import ssl

import urllib2

import time

i=1

while(1):

print "开始捕获第%s张图片" % str(i)

#if hasattr(ssl, '_create_unverified_context'):

# ssl._create_default_https_context = ssl._create_unverified_context

#ssl._create_default_https_context = ssl._create_unverified_context

#req = urllib2.Request("https://kyfw.12306.cn/passport/captcha/captcha-image?login_site=E&module=login&rand=sjrand&0.03777190844118017")

req = urllib2.Request("https://kyfw.12306.cn/passport/captcha/captcha-image?*")

u = urllib2.urlopen(req)

data = u.read()

print "捕获中.........."

with open("CodePng20180119-"+str(i)+".jpg", 'wb') as f:

time.sleep(2)

f.write(data)

i += 1

print "捕获结束!"

# python3中使用 urllib.request 代替urllib2

补充下: 获取验证码,用于今后自动识别的源.

weixin_39928102

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
12306登录python_python爬虫 -- 12306登录刷票

# -*- coding: utf-8 -*-from splinter.browser import Browserfrom time import sleepimport traceback# Message: 'phantomjs' executable needs to be in PATH.# geckodriver 要自行下载# phantomjs 自行下载# we nee...
复制链接

扫一扫