爬虫记录帖：携程某个城市的全部酒店数据

最新推荐文章于 2025-03-08 23:22:24 发布

æµ®æµªæ½®

最新推荐文章于 2025-03-08 23:22:24 发布

阅读量1.5k

点赞数 6

文章标签：爬虫

本文链接：https://blog.csdn.net/weixin_43482007/article/details/142851491

版权

import time
import pandas as pd
from DrissionPage import ChromiumPage
page = ChromiumPage()
# data = ['酒店', '价格', '评分', '评价数', 'id', 'tags',  'lng', 'lat', 'address', 'area', 'poi']
total = 10192 # 这个城市的总酒店数，可以通过接口看到
page.listen.start('json/HotelSearch')
page.get('https://hotels.ctrip.com/hotels/list?countryId=1&city=17&provinceId=0&checkin=2024/10/08&checkout=2024/10/09&optionId=17&optionType=City&directSearch=0&display=%E6%9D%AD%E5%B7%9E&crn=1&adult=1&children=0&searchBoxArg=t&travelPurpose=0&ctm_ref=ix_sb_dl&domestic=1&')
count = 0
page_ = 1
while count < total:
    print(f'正在采集第页{page_}数据')
    data = []
    if page_ > 4:
        time.sleep(2)
        try:
            next_page = page.ele('css:.btn-box').click.multi()
        except:
            time.sleep(10)
            next_page = page.ele('css:.btn-box').click.multi()

    response = page.listen.wait()
    json_data = response.response.body

    hotel_list = json_data['Response']['hotelList']['list']
    # 遍历提取信息
    for hotel in hotel_list:
        hotelName = hotel['base']['hotelName']  # 名称
        hotelId = hotel['base']['hotelId']  # id
        try:
            price = hotel['money']['price']  # 价格
        except:
            price = 'none'
        try:
            score = hotel['score']['number']  # 评分
        except:
            score = 'none'  # 评分
        try:
            tags = hotel['base']['tags']
        except:
            tags = 'none'
        try:
            lng = hotel['position']['lng']
        except:
            lng = 'none'
        try:
            lat = hotel['position']['lat']
        except:
            lat = 'none'
        try:
            address = hotel['position']['address']
        except:
            address = 'none'
        try:
            area = hotel['position']['area']
        except:
            area = 'none'
        try:
            poi = hotel['position']['poi']
        except:
            poi = 'none'
        try:
            comment = hotel['comment']['content']  # 评价数
        except:
            comment = 'none'
        data.append([hotelName, price, score, comment, hotelId, tags, lng, lat, address, area, poi])
    df = pd.DataFrame(data)
    df.to_csv("/Users/xiaoming/Desktop/learning/python/data/hotel_xiecheng.csv", index=False, header=False, mode='a')
    if page_ < 5:
        page.scroll.to_bottom()
    page_ = page_ + 1
    count = count + len(hotel_list)