python获取12306网站车票信息

最新推荐文章于 2024-07-08 14:21:22 发布

木桥的博客

最新推荐文章于 2024-07-08 14:21:22 发布

阅读量980

点赞数 2

分类专栏： PYTHON 文章标签：爬虫 qt

本文链接：https://blog.csdn.net/haohaoouyang/article/details/109803295

版权

本文介绍了如何使用Python爬虫配合QT GUI程序抓取12306网站的车票信息。重点讲解了requests模块的使用，包括设置header和cookie以避免被网站屏蔽。此外，还定义了获取车站名称和车次信息的函数，并在GUI中实现了爬票功能。文章提到了使用cookie和header的重要性，以及当爬虫被屏蔽时的解决策略，如使用selenium模块。

摘要由CSDN通过智能技术生成

接上一篇文章用QT制作了爬票的gui程序后，接下来实现抓取相关数据展示在gui程序中。在完成功能前先说下python 爬虫经常用到的知识点

import urllib.parse #urllib库为python3自带的库，无须安装
import urllib.request 
import urllib3  #需要python3中使用pip install urllib3 进行安装
import requests
from requests.exceptions import ReadTimeout,HTTPError,RequestException
from bs4 import BeautifulSoup #提取html内容的库

#urllib用法示例
data = bytes(urllib.parse.urlencode({
   'word':'hello'}), encoding='utf8')
response = urllib.request.urlopen('http://httpbin.org/post', data=data)
print(response.read().decode('utf8'))  

#urllib3用法示例
http = urllib3.PoolManager()
url = 'http://www.baidu.com'
response = http.request('GET',url)
print(response.data.decode('utf8'))

#requests.get用法示例,这里是带header头和加cookie的示例
heaed = {
   'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'}
cookies = {
   'STOKEN_BFESS': '56b3ea70e2d3a03ac3fe0b80145ec3c09d86b69e56d6dbdb1d0501377926de53'}
response = requests.get(url, headers=heaed, cookies=cookies)
resp = requests.get('http://www.baidu.com')
print(resp.status_code)
print(resp.url)
print(resp.headers)
print(resp.cookies)
print(resp.text)
print(resp.content.decode('utf8'))

#requests.post用法示例
posts = {
   'username':'hello'}
resp = requests.post('http://httpbin.org/post',data=posts)
print(resp.status_code)
print(resp.url)
print(resp.headers)
print(resp.cookies)
print(resp.text)
print(resp.content.decode('utf8'))

#这里是展示BeautifulSoup简单用法示例
response = requests.get('http://news.baidu.com')
soup = BeautifulSoup(response.text,features='lxml')
print(soup.prettify())
print(soup.find('title').text)

上面示例代码主要介绍了python 爬虫常用到的一些模块，其中requests用法示例中带有header头和cookie相关的使用介绍，因在实际项目中爬取数据经常会被网站屏蔽，故带上header头和客户端cookie可以解决这一问题，cookie的获取方式可以在浏览器中查看
chrome cookie
上一篇文章介绍了使用Qt Designer工具制作爬票gui，运行示例如下图
在这里插入图片描述

定义一组函数获取12306所有车站名称，命名get_stations.py,并把获取到车站名称保存到文件中

import re
import  requests
import os

def getStation():
    # 发送请求获取所有车站名称,通过输入的站名称转化查询地址的参数
    # url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9006'
    url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9050'
    response = requests.get(url, verify=True)  # 请求并进行验证
    stations = re.findall('([\u4e00-\u9fa5]+)\|([A-Z]+)', response.text)  # 获取需要的车站名称
    stations = dict((stations), indent=4)  # 转换为dic
    stations = str(stations)  # 转换为字符串类型否则无法写入文件
    write(stations)           #调用写入方法
def write(stations):
    file = open('stations.text', 'w', encoding='utf_8_sig')  # 以写模式打开文件
    file.write(stations)  # 写入数据
    file.close()
def read():
    file = open('stations.text', 'r', encoding='utf_8_sig')  # 以写模式打开文件
    data = file.readline()                                  #读取文件
    file.close()
    return data

def isStations():
    isStations = os.path.exists('stations.text')      #判断车站文件是否存在
    return isStations

接下来定义函数获取站点相关车次信息，并加载query_requests.py文件中的方法，查看12306站点获取车次相关接口,得到结果后分析组装相关数据
12306车次分析接口

import requests
from get_stations import *

data = []  # 用于保存整理好的车次信息
type_data = []  # 保存车次分类后最后的数据


def query(date, from_station, to_station):
    data.clear()  # 清空数据
    type_data.clear()  # 清空车次分类保存的数据
    # 查询请求地址
    url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT'.format(
        date, from_station, to_station)
    # 发送查询请求
    heaed = {
   'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'}
    cookies = {
   '_uab_collina': '160557808500061392861085', 'JSESSIONID': '4529B254DD1DA81A87A8C51E37B0E226',
               'RAIL_DEVICEID': 'QJecep2XuCpZ9e7kdVkYpbWmhpiRe6Czk1alTKJSUyNu9KNK8v9USaqbZOK4Nx4uRH8TLzrLzuvwjirgmdYXhYurvlG4m0UWJPEP8Wg2BHOfJ3BUj6UlHxUs01_8cwsmJWIGBKw7k_7c_yqerJ7fq4JOFQUfoiPu'}
    response = requests.get(url, headers=heaed, cookies=cookies)
    # # 将json数据转换为字典类型，通过键值对取数据
    result = response.json()
    result = result['data']['result']
    if isStations() == True:
        stations = eval(read())