背景:
由于要买机票,所以一直进行搜索,爬虫可以帮我解决这个问题;
解释的超级详细。
于是通过这一过程,基本了解了一些;
- 查询 上海 到 西安 4.29~05.02的机票:
#coding:utf-8
import urllib2
from lxml import etree
import json
import random
import sys
reload(sys)
sys.setdefaultencoding('utf8')
def get_json2(date,rk,CK,r):
'''根据构造出的url获取到航班数据'''
url= "http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=SHA&ACity1=SIA&SearchType=S&DDate1=%s&IsNearAirportRecommond=0&rk=%s&CK=%s&r=%s"%(date,rk,CK,r)
headers={
'Host':"flights.ctrip.com",'User-Agent':"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0",'Referer':"http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=2017-04-29"}
headers['Referer']="http://flights.ctrip.com/booking/hrb-sha-day-1.html?ddate1=%s"%date
req=urllib2.Request(url,headers=headers)
res=urllib2.urlopen(req)
content=res.read()
dict_content=json.loads(content,encoding="gb2312")
length = len(dict_content['fis'])
# print length
i = 0
for i in range(length):
if ((dict_content['fis'][i][u'lp']) < 600 ):
print (dict_content['fis'][i][u'lp']),
print (dict_content['fis'][i][u'dt']),
print (dict_content['fis'][i][u'at'])
#print (dict_content['fis'][i][u'dpbn'])
def</