import requests
import json
import csv
def getOnePageData(page):
start_url = "https://movie.douban.com/j/chart/top_list?"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
}
params = {
"type": "24",
"interval_id": "100:90",
# "action"
"start": page,
"limit": 20,
}
response = requests.get(start_url, headers=headers, params=params)
if response.status_code == 200:
content_json = response.text
content_list = json.loads(content_json)
# print(content_dic)
return content_list
def writeToCsv(data_all):
with open("D:\practice\douBa.csv", mode="a", encoding="utf-8") as file:
film_list = csv.writer(file)
for i in range(80):
data =data_all[i]
title = data["title"]
rate =data["score"]
types = data["types"]
film_list.writerow([title, rate, types])
if __name__ == '__main__':
page = ""
data_all = []
for page in range(20, 81, 20):
data_all = data_all + getOnePageData(page)
# with open("D:\practice\douBa.csv", mode="a", encoding="utf-8") as file:
# film_list = csv.writer(file)
writeToCsv(data_all)
1.这个和之前的获取的json数据是不一样的,之前遇到的json格式数据是一个对象,以{ } 开始和结束,所以在获取数据时,
content0 = content_dict["mods"]
直接采用字典的方法就可以。
但这次的json数据是一个数组,以[ ]开始和结束
for i in range(80):
data =data_all[i]
title = data["title"]
所以需要先获取到列表里的每一项,在采用字典的方法。
2.在翻页处理时,这是模板。
for page in range(20, 81, 20):
data_all = data_all + getOnePageData(page)
3.ValueError: I/O operation on closed file.
这个报错,是因为file变量只在该代码块内有效,使用完毕后会自动关闭i/o流,文件操作处理语句还是要多注意的。