利用post-data来构造信息抓取艺龙酒店

在post请求类型中,有一种利用formdata来进行翻页、搜索的方法,以艺龙为例。/20180112

其实,还可以筛选酒店类型:如家,汉庭啊之类的,都在data中,方法都是一样的。/20180115

# coding: utf-8
__author__ ='姜枫渔火'
import requests, re, time, random, pandas
from fake_useragent import UserAgent

def getOnePage(url):
    res = requests.post(url, data = data, headers = headers)
    html = res.json()
#     print(html)
    return html

def prasePage(html):
    hptel_name = re.findall('title=\"(.+?)\"><span',html['value']['hotelListHtml'])
#     print(len(hptel_name), hptel_name)
    hptel_prince = re.findall('class="h_pri_num ">(.*?)</span>',html['value']['hotelListHtml'])
#     print(len(hptel_prince), hptel_prince)
    data = list(map(lambda x:(hptel_name[x], hptel_prince[x]),range(len(hptel_name))))
    print(data)
    return data

def writeToFile(data):
    content = pandas.DataFrame(data)
    print('writing')
    content.to_csv('艺龙.csv', header=False, index=False, mode='a+')
    print("done")

if __name__ == '__main__':
    city = input("请输入待查询城市:")
    InData = input("请输入入住时间(xxxx-xx-xx):")
    OutData = input("请输入离开时间(xxxx-xx-xx):")
    for n in map(lambda i : str(i), range(1, 21)):
        print("第" + n + "页")
        url = 'http://hotel.elong.com/ajax/list/asyncsearch'
        data = {
            "code":"7140144",
    "listRequest.areaID":"",
    "listRequest.bookingChannel":"1",
    "listRequest.cardNo":"192928",
    "listRequest.checkInDate":InData + " 00:00:00",    # 入住时间
    "listRequest.checkOutDate":OutData + " 00:00:00",    # 离开时间
    "listRequest.cityID":"0101",
    "listRequest.cityName":city,    # 北京等地区
    "listRequest.customLevel":"11",
    "listRequest.distance":"20",
    "listRequest.endLat":"0",
    "listRequest.endLng":"0",
    "listRequest.facilityIds":"",
    "listRequest.highPrice":"0",
    "listRequest.hotelBrandIDs":"",
    "listRequest.isAdvanceSave":"false",
    "listRequest.isAfterCouponPrice":"true",
    "listRequest.isCoupon":"false",
    "listRequest.isDebug":"false",
    "listRequest.isLimitTime":"false",
    "listRequest.isLogin":"false",
    "listRequest.isMobileOnly":"true",
    "listRequest.isNeed5Discount":"true",
    "listRequest.isNeedNotContractedHotel":"false",
    "listRequest.isNeedSimilarPrice":"false",
    "listRequest.isReturnNoRoomHotel":"true",
    "listRequest.isStaySave":"false",
    "listRequest.isTrace":"false",
    "listRequest.isUnionSite":"false",
    "listRequest.keywords":"",
    "listRequest.keywordsType":"0",
    "listRequest.language":"cn",
    "listRequest.listType":"0",
    "listRequest.lowPrice":"0",
    "listRequest.orderFromID":"50",
    "listRequest.pageIndex":n,    # 翻页
    "listRequest.pageSize":"20",
    "listRequest.payMethod":"0",
    "listRequest.personOfRoom":"0",
    "listRequest.poiId":"0",
    "listRequest.promotionChannelCode":"0000",
    "listRequest.proxyID":"ZD",
    "listRequest.rankType":"0",
    "listRequest.returnFilterItem":"true",
    "listRequest.sellChannel":"1",
    "listRequest.seoHotelStar":"0",
    "listRequest.sortDirection":"1",
    "listRequest.sortMethod":"1",
    "listRequest.starLevels":"",
    "listRequest.startLat":"0",
    "listRequest.startLng":"0",
    "listRequest.taRecommend":"false",
    "listRequest.themeIds":"",
    "listRequest.ctripToken":"1c06a555-04ce-4884-aa05-e6f92ad0e84e",
    "listRequest.elongToken":"jc94shhj-d5a1-4092-8060-828b168dbb61"
        }
        headers = {'Accept':'application/json, text/javascript, */*; q=0.01',
    'Accept-Encoding':'gzip, deflate',
    'Accept-Language':'zh-CN,zh;q=0.8',
    'Cache-Control':'no-cache',
    'Content-Length':'1599',
    'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
    # 'Cookie':'……61b8-48a1-b398-8b9ec1903f05……',
    'Host':'hotel.elong.com',
    'Origin':'http://hotel.elong.com',
    'Pragma':'no-cache',
    'Proxy-Connection':'keep-alive',
    'Referer':'http://hotel.elong.com/beijing/',
    'User-Agent':UserAgent().random,
    'X-Requested-With':'XMLHttpRequest'}
        html = getOnePage(url)
        data = prasePage(html)
        writeToFile(data)
        time.sleep(random.randint(1,4))




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值