python爬取固定酒店评论_爬取携程上酒店评论数据

爬虫代码:

import time

import csv

import re

from urllib import request

import json

c=open(r'D:\安吉竹博园开元度假村.csv','a+',newline='',encoding='utf8')

fieldnames=['user','time','score','content']

writer=csv.DictWriter(c,fieldnames=fieldnames)

writer.writeheader()

def getResponse(url):

'''

请求头信息(data)通过下面这个网站(据说是手机端网页)获得,其中26683709是酒店的ID号,而酒店的ID号就在原始网站的网页链接中

https://m.ctrip.com/webapp/hotel/HotelDetail/dianping/26683709.html

携程上安吉竹博园开元度假村原始网站:https://hotels.ctrip.com/hotel/26683709.html?isFull=F&masterhotelid=26683709&hcityid=659#ctm_ref=hod_sr_lst_dl_n_1_6

'''

data = {"hotelId": 26683709, "pageIndex": 2, "tagId": 0, "pageSize": 10, "groupTypeBitMap": 2,"needStatisticInfo": 0, "order": 0, "basicRoomName": "", "travelType": -1,"head": {"cid": "09031174312350135405", "ctok": "", "cver": "1.0", "lang": "01", "sid": "8888","syscode": "09", "auth": "93C8AE20D20009DC90E6E10BB588DE61E67EBBC236DE15433FDDADFD95636F28", "extension": []}}

data = json.dumps(data).encode(encoding='utf-8')#封装请求信息.json.dumps()用于将字典形式的数据转化为字符串

header_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',

"Content-Type": "application/json"}

url_request = request.Request(url=url, data=data, headers=header_dict)

url_response = request.urlopen(url_request)

return url_response

datas = []#存放生成的多个请求头

for j in range(6):

#使用"pageIndex":str(j + 1)进行翻页

data1 = {"hotelId": 26683709, "pageIndex":str(j + 1), "tagId": 0, "pageSize": 10, "groupTypeBitMap": 2,"needStatisticInfo": 0, "order": 0, "basicRoomName

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值