问题:我起初爬了豆瓣的一些数据下来,爬取了总共有10个界面,我代码里面的保存json文件代码是没问题的,但是一次只能保存一页,保存两页的话json文件就会变null(虽然保存失败 但是控制台能够完全输出所有内容) 求解
代码在这
import json import time from selenium import webdriver import lxml.html import requests import uesragent def parse_http_url(temp_url): #抓取数据方法 response = requests.get(temp_url,headers=uesragent.get_header()) texts=response.content.decode("utf-8") return texts def catch_moving_list(*group): #抓取网页数据方法 temp_text=group[0] count=group[1] if count !=0: all_moving_detail=group[2] else: all_moving_detail=[] html_source = temp_text metree = lxml.html.etree parser = metree.HTML(html_source) # xpath语法规范 moving_li = parser.xpath("//ol[@class='grid_view']/li") # print(moving_list) for detail_moving in moving_li: moving_text= {} moving_name=detail_moving.xpath("./div/div[@class='pic']/a/img/@alt")[0] moving_text['电影名']=moving_name moving_src=detail_moving.xpath("./div/div[@class='pic']/a/@href")[0] moving_text['电影地址'] = moving_src moving_score = detail_moving.xpath("./div/div[@class='info']/div[@class='bd']/div/span[@class='rating_num']/text()")[0] moving_text['电影评分'] = moving_score moving_comment = detail_moving.xpath("./div/div[@class='info']/div[@class='bd']/div/span[last()]/text()")[0] moving_comment_len=len(moving_comment) moving_text['电影评价人数'] = moving_comment[0:moving_comment_len-3] count+=1 all_moving_detail.append(moving_text) time.sleep(1) if count!=175: http_url = "https://movie.douban.com/top250?start="+str(count)+"&filter=" text_html=parse_http_url(http_url) catch_moving_list(text_html,count,all_moving_detail) else: print(all_moving_detail) # print(count) return all_moving_detail def save_all_moving_file(temp_text): #保存文件方法 moving_strs=json.dumps(temp_text,ensure_ascii=False,indent=3) # 打开文件 moving_file=open('all_moving_datas.json','w',encoding='utf-8') # 写入数据 moving_file.write(moving_strs) # 关闭文件 moving_file.close() pass def main(): #main方法, arr=[] num=125 http_url="https://movie.douban.com/top250?start="+str(num)+"&filter=" text_html=parse_http_url(http_url) more_moving_list=catch_moving_list(text_html,num,arr) save_all_moving_file(more_moving_list) pass if __name__=='__main__': main();