经过前面两篇文章,已经准备的差不多了
爬取携程机票信息:https://blog.csdn.net/weixin_42109012/article/details/96423081
获取携程城市缩写:https://blog.csdn.net/weixin_42109012/article/details/96378400
现在我们就需要把我们复制请求载荷,改变为设置请求载荷。
1、请求载荷分析
flightway 是航班方式,不太好改,我就没有深入研究
airportParams 是航班请求关键,我们只想输入起点、终点、时间。
所以我们需要把城市的名称、缩写、编号关联起来。
2、从数据库中取出信息
这个就很简单了,没什么难度,取出来的是 list 格式
def select(sql):
db = pymysql.connect(
host="localhost",
port=3308,
user="root",
password="123456",
database="携程"
)
cur = db.cursor()
try:
# 执行sql语句
cur.execute(sql)
results = cur.fetchall()
# 返回结果
return results
except Exception as e:
raise e
finally:
db.close()
if __name__ == "__main__":
sql = "select * from city where CN='绵阳'"
print(select(sql))
print(select(sql)[0])
print(select(sql)[0][0])
3、修改请求载荷
dcityname = input("请输入出发地:")
acityname = input("请输入目的地:")
date = input("请输入时间(格式:2008-08-08):")
sql1 = "select * from t_city where CN='%s'" % dcityname
sql2 = "select * from t_city where CN='%s'" % acityname
dcity = mysql.mysql_select(sql1)[0][2]
dcityid = mysql.mysql_select(sql1)[0][3]
acity = mysql.mysql_select(sql2)[0][2]
acityid = mysql.mysql_select(sql2)[0][3]
print(dcity, dcityname, dcityid, acity, acityname, acityid)
url = "https://flights.ctrip.com/itinerary/api/12808/products"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0",
"Referer": "https://flights.ctrip.com/itinerary/oneway/{}-{}?date={}".format(dcity, acity, date),
"Content-Type": "application/json",
}
request_payload = {
"flightWay": "Oneway",
"classType": "ALL",
"hasChild": False,
"hasBaby": False,
"searchIndex": 1,
"airportParams": [
{"dcity": dcity, "acity": acity, "dcityname": dcityname, "acityname": acityname, "date": date, "dcityid": dcityid, "acityid": acityid}
# {"dcity": "SHA", "acity": "BJS", "dcityname": "上海", "acityname": "北京", "date": "2019-07-23", "dcityid": 2, "acityid": 1}
],
}
整体代码
import requests
import json
import pymysql
def select(sql):
db = pymysql.connect(
host="localhost",
port=3308,
user="root",
password="123456",
database="携程"
)
cur = db.cursor()
try:
# 执行sql语句
cur.execute(sql)
results = cur.fetchall()
# 返回结果
return results
except Exception as e:
raise e
finally:
db.close()
def FlightInfo():
dcityname = input("请输入出发地:")
acityname = input("请输入目的地:")
date = input("请输入时间(格式:2008-08-08):")
sql1 = "select * from t_city where CN='%s'" % dcityname
sql2 = "select * from t_city where CN='%s'" % acityname
dcity = mysql.mysql_select(sql1)[0][2]
dcityid = mysql.mysql_select(sql1)[0][3]
acity = mysql.mysql_select(sql2)[0][2]
acityid = mysql.mysql_select(sql2)[0][3]
print(dcity, dcityname, dcityid, acity, acityname, acityid)
url = "https://flights.ctrip.com/itinerary/api/12808/products"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0",
"Referer": "https://flights.ctrip.com/itinerary/oneway/{}-{}?date={}".format(dcity, acity, date),
"Content-Type": "application/json",
}
request_payload = {
"flightWay": "Oneway",
"classType": "ALL",
"hasChild": False,
"hasBaby": False,
"searchIndex": 1,
"airportParams": [
{"dcity": dcity, "acity": acity, "dcityname": dcityname, "acityname": acityname, "date": date, "dcityid": dcityid, "acityid": acityid}
# {"dcity": "SHA", "acity": "BJS", "dcityname": "上海", "acityname": "北京", "date": "2019-07-23", "dcityid": 2, "acityid": 1}
],
}
# post请求
response = requests.post(url, data=json.dumps(request_payload), headers=headers).text
# print(response)
# 很多航班信息在此分一下
routeList = json.loads(response).get('data').get('routeList')
# print(routeList)
# 依次读取每条信息
if routeList is not None:
for route in routeList:
# 判断是否有信息,且只取直达航班
if len(route.get('legs')) == 1:
legs = route.get('legs')
flight = legs[0].get('flight')
# 提取想要的信息
airlineName = flight.get('airlineName')
flightNumber = flight.get('flightNumber')
departureDate = flight.get('departureDate')
arrivalDate = flight.get('arrivalDate')
departureCityName = flight.get('departureAirportInfo').get('cityName')
departureAirportName = flight.get('departureAirportInfo').get('airportName')
arrivalCityName = flight.get('arrivalAirportInfo').get('cityName')
arrivalAirportName = flight.get('arrivalAirportInfo').get('airportName')
print(departureCityName, departureAirportName, "\t",
arrivalCityName, arrivalAirportName, "\t",
departureDate, arrivalDate, "\t",
airlineName, flightNumber)
else:
print("没有相关航班!")
if __name__ == "__main__":
FlightInfo()
效果
总结
我在想这样做和直接在网页上搜索有什么不一样,一样好吧!!
最后我觉得其实可以通过数据库的编号批量输入信息(一共266个城市)两个for循环判断不是同一个城市即可,在多开几个线程(几核开几个),只输入时间就可以把当天所有信息都给提取出来。
date = input("请输入时间(格式:2008-08-08):")
for i in range(1, 267):
for j in range(1, 267):
if i == j:
j += 1
else:
sql1 = "select * from city where id=%d" % i
sql2 = "select * from city where id=%d" % j
dcityname = select(sql1)[0][1]
dcity = select(sql1)[0][2]
dcityid = select(sql1)[0][3]
acityname = select(sql2)[0][1]
acity = select(sql2)[0][2]
acityid = select(sql2)[0][3]
request_payload = {
"flightWay": "Oneway",
"classType": "ALL",
"hasChild": False,
"hasBaby": False,
"searchIndex": 1,
"airportParams": [
{"dcity": dcity, "acity": acity, "dcityname": dcityname, "acityname": acityname, "date": date,
"dcityid": dcityid, "acityid": acityid}
# {"dcity": "SHA", "acity": "BJS", "dcityname": "上海", "acityname": "北京", "date": "2019-07-23", "dcityid": 2, "acityid": 1}
]
}