爬取去哪网故宫景区评论-CSDN博客

本文链接：https://blog.csdn.net/weixin_45041762/article/details/142816666

import csv
import json
import requests
import time
 
 
def comment(sightId, page):
    url = "https://piao.qunar.com/daytrip/getCommentInfo.json?spuId=%d&pageNum=%d&pageSize=10&tagType=0"
    url = url % (sightId, page)
 
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36",
    }
    res = requests.get(url=url, headers=headers, timeout=5)
    # 判断服务器返回数据是否正确
    while res.text[1] == "r":
        res = requests.get(url=url, headers=headers, timeout=5)
    else:
        pass
    results = res.json()["data"]
    comments = []
    for result in results["commentList"]:
        # 评论者id
        author = result["author"]
        # 评论日期
        publishedDate = result["date"]
        # 总评分
        score = result["score"]
        # 图片数量
        imgNum = len(result["imgs"])
        # 评论内容
        text = result["content"]
 
        commentList = [author, publishedDate, score, imgNum, text]
        comments.append(commentList)
        print(commentList)
    time.sleep(5)
    return comments
 
 
def storage_as_json(file_path, reviewsList):
    data = {"评论列表": reviewsList}
 
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)
 
 
def load_data_from_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
        reviewsList = data["评论列表"]
        return reviewsList
 
 
def storage_as_csv(file_path, reviewsList):
    header = ['评论者ID', '评论日期', '总评分', '图片数量', '文本评论']
 
    with open(file_path, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(header)
        writer.writerows(reviewsList)
 
 
if __name__ == "__main__":
    sightId = 3300887
    page = 50
 
    comments = []
    for i in range(1, page + 1):
        try:
            comments += comment(sightId, i)
        except:
            pass
 
    storage_as_json('comment.json', comments)
    loaded_data = load_data_from_json('comment.json')
    storage_as_csv('comment.csv', loaded_data)