1 importrequests2 importurllib3 importtime4 #import pymongo
5
6 #client=pymongo.MongoClient('localhost',27017)
7 #book_qunar=client['qunar']
8 #sheet_qunar_zyx=book_qunar['qunar_zyx']
9
10 #获取产品列表
11 defget_list(dep,item):12 url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit=0,20&includeAD=true&qsact=search'.format(13 urllib.request.quote(dep), urllib.request.quote(item), urllib.request.quote(item))14 strhtml =get_json(url)15 try:16 routeCount = int(strhtml['data']['limit']['routeCount'])17 except:18 return
19 for limit in range(0, routeCount, 20):20 url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit={},20&includeAD=true&qsact=search'.format(21 urllib.request.quote(dep), urllib.request.quote(item),22 urllib.request.quote(item), limit)23 strhtml =get_json(url)24 result ={25 'date': time.strftime('%Y-%m-%d', time.localtime(time.time())),26 'dep': dep,27 'arrive': item,28 'limit': limit,29 'result': strhtml30 }31 #sheet_qunar_zyx.insert_one(result)
32 print(result)33
34 #def connect_mongo():
35 #client=pymongo.MongoClient('localhost',27017)
36 #book_qunar=client['qunar']
37 #return book_qunar['qunar_zyx']
38
39
40 defget_json(url):41 strhtml=requests.get(url)42 time.sleep(1)43 returnstrhtml.json()44
45 if __name__ == "__main__":46
47 url='https://touch.dujia.qunar.com/depCities.qunar'
48 dep_dict=get_json(url)49 #这里是json格式 dep_dict中内嵌勒一层
50 for dep_item in dep_dict['data']:51 for dep in dep_dict['data'][dep_item]:52 a = []#目的地去重
53 #经过解码工具可以得到dep表示出发地 query和originalquery表示目的地
54 url = 'https://m.dujia.qunar.com/golfz/sight/arriveRecommend?dep={}&exclude=&extensionImg=255,175'.format(urllib.request.quote(dep))55 arrive_dict =get_json(url)56 for arr_item in arrive_dict['data']:57 for arr_item_1 in arr_item['subModules']:58 for query in arr_item_1['items']:59 if query['query'] not ina:60 a.append(query['query'])61 for item ina:62 get_list(dep,item)