刚刚学了一下,还不是很明白。随手记录。
在piplines.py文件中 将爬到的数据 放到json中
class DoubanmoviePipelin2json(object):
#打开文件 open_spider 内部自带 不能自己起名 def open_spidef(self,spider): self.json=open("doubantop250.json","w",encoding="utf-8") self.jsonfile.write("["+"/n") self.first=1
# 对数据进行跌代 放入json文件中 def process_item(self,item,spider): if self ==1: movie=json.dumps(dict(item),ensure_ascii=False) self.jsonfile.write(movie) self.first=0 else: movie=",\n"+json.dumps(dict(item),ensure_ascii=False) self.jsonfile.write(movie) return item
#关闭文件 def close_spider(self,spider): self.jsonfile.write("\n"+"]") self.jsonfile.close()
写入xls中
1 class DoubanmoviePipline2xls(object): 2 def open_spider(self,spider): 3 self.workbook=xlwt.Workbook(encoding="utf-8") 4 self.worksheet=self.work.add_sheet("doubantop250") 5 hearder=["电影排名","电影名"] 6 for colsIndex in range(len(header)): 7 worksheet.write(0,colsIndex,header[colsIndex]) 8 self.rows=1 9 def process_item(self,item,spider): 10 movie=[item[k] for k in item] 11 for colsIndex in range(len(movie)): 12 self.worksheet.write(self.rows,colsIndex,movie[colsIndex])] 13 self.rows+=1 14 return item 15 def close_spider(self,spider): 16 self.rows=0 17 self.workbook.save("doubantop250.xls")