【python 爬虫】携程,去哪儿评论爬虫

1、去哪儿

# -*- coding:utf-8 -*-

import re
import json
import requests
import pandas as pd
date=[]
content=[]
for i in range(1,1000):
    try:
        print("正在抓取第"+str(i)+"页")
        url="https://touch.piao.qunar.com/touch/queryCommentsAndTravelTips.json?type=mp&pageSize=10&fromType=SIGHT&pageNum="+str(i)+"&sightId=5759&tagType=0"
        html=requests.get(url).text
        html=json.loads(html)
        data=html['data']
        # print(data)
        commentList=data['commentList']
        # print(commentList)
        for each in commentList:
            # print(each)
            content1=each['content']
            txt = re.sub("♬ ", "", content1)
            date1=each['date']
            date.append(date1)
            content.append(content1)
    except:
        pass



result=pd.DataFrame({'date:':date,'content':content})

result.to_csv('F:/qunaer.csv',index=False)

2、携程

# -*- coding:utf-8 -*-
import re
import requests
import json

date=[]
comment=[]
import pandas as pd


for i in range(1,130):
    try:
        print('正在抓取第'+str(i)+"页")
        url="https://m.ctrip.com/restapi/soa2/10491/json/GetCommentListAndHotTagList?_fxpcqlniredt=09031014411533277785"

        data1={
            "BusinessId":"20485",
            "BusinessType":"11",
            "ChannelType":"7",
            "CommentTagId":"0",
            "ImageFilter":"false",
            "PageIndex":int(i),
            "PageSize":"10",
            "PoiId":"0",
            "SortType":"3",
            "StarType":"0",
            "TouristType":"0",
            "VideoImageHeight":"392",
            "VideoImageWidth":"700"
        }


        data2={
            "auth":"null",
            "cid":"09031014411533277785",
            "ctok":"",
            "cver":"1.0",
            "lang":"01",
            "sid":"8888",
            "syscode":"09"
        }


        data3={
            "lang":"01",
            "sid":"8888",
            "syscode":"09"
        }

        data={
            "CommentResultInfoEntity":data1,
            "head":data2
        }

        html=requests.post(url,data=json.dumps(data)).text
        # print(html)

        html=json.loads(html)

        CommentResult=html['CommentResult']
        # print(CommentResult)
        CommentInfo=CommentResult['CommentInfo']
        for each in CommentInfo:
            # print(each)
            Content=each['Content']
            Content = re.sub("♬ ", "", Content)
            print(Content)
            PlayYear=each['PlayYear']
            PlayMonth=each['PlayMonth']
            PlayDay=each['PlayDay']
            date1=str(PlayYear)+'-'+str(PlayMonth)
            # print(str(PlayYear)+'-'+str(PlayMonth))
            comment.append(Content)
            date.append(date1)

    except:
        pass


result=pd.DataFrame({'date:':date,'content':comment})

result.to_csv('F:/xiecheng.csv',index=False)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

东华果汁哥

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值