爬虫记录帖:携程某个城市的全部酒店数据

import time
import pandas as pd
from DrissionPage import ChromiumPage
page = ChromiumPage()
# data = ['酒店', '价格', '评分', '评价数', 'id', 'tags',  'lng', 'lat', 'address', 'area', 'poi']
total = 10192 # 这个城市的总酒店数,可以通过接口看到
page.listen.start('json/HotelSearch')
page.get('https://hotels.ctrip.com/hotels/list?countryId=1&city=17&provinceId=0&checkin=2024/10/08&checkout=2024/10/09&optionId=17&optionType=City&directSearch=0&display=%E6%9D%AD%E5%B7%9E&crn=1&adult=1&children=0&searchBoxArg=t&travelPurpose=0&ctm_ref=ix_sb_dl&domestic=1&')
count = 0
page_ = 1
while count < total:
    print(f'正在采集第页{page_}数据')
    data = []
    if page_ > 4:
        time.sleep(2)
        try:
            next_page = page.ele('css:.btn-box').click.multi()
        except:
            time.sleep(10)
            next_page = page.ele('css:.btn-box').click.multi()

    response = page.listen.wait()
    json_data = response.response.body

    hotel_list = json_data['Response']['hotelList']['list']
    # 遍历提取信息
    for hotel in hotel_list:
        hotelName = hotel['base']['hotelName']  # 名称
        hotelId = hotel['base']['hotelId']  # id
        try:
            price = hotel['money']['price']  # 价格
        except:
            price = 'none'
        try:
            score = hotel['score']['number']  # 评分
        except:
            score = 'none'  # 评分
        try:
            tags = hotel['base']['tags']
        except:
            tags = 'none'
        try:
            lng = hotel['position']['lng']
        except:
            lng = 'none'
        try:
            lat = hotel['position']['lat']
        except:
            lat = 'none'
        try:
            address = hotel['position']['address']
        except:
            address = 'none'
        try:
            area = hotel['position']['area']
        except:
            area = 'none'
        try:
            poi = hotel['position']['poi']
        except:
            poi = 'none'
        try:
            comment = hotel['comment']['content']  # 评价数
        except:
            comment = 'none'
        data.append([hotelName, price, score, comment, hotelId, tags, lng, lat, address, area, poi])
    df = pd.DataFrame(data)
    df.to_csv("/Users/xiaoming/Desktop/learning/python/data/hotel_xiecheng.csv", index=False, header=False, mode='a')
    if page_ < 5:
        page.scroll.to_bottom()
    page_ = page_ + 1
    count = count + len(hotel_list)


评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值