my_exporters.py
# 以excel格式导出的Exporter
from scrapy.exporters import BaseItemExporter
import xlwt
class ExcelItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self._configure(kwargs)
self.file = file
self.wbook = xlwt.Workbook()
self.wsheet = self.wbook.add_sheet('scrapy')
self.row = 0
def finish_exporting(self):
self.wbook.save(self.file)
def export_item(self, item):
fields = self._get_serialized_fields(item)
for col, v in enumerate(x for _,x in fields):
self.wsheet.write(self.row,col,v)
self.row += 1
settings.py
# 添加新的导出数据格式
FEED_EXPORTERS = {'xls':'example.my_exporters.ExcelItemExporter'}
官方
https://doc.scrapy.org/en/latest/topics/feed-exports.html
默认导出格式代码
scrapy.exporters.py
默认设置
scrapy.settings.default_settings.py
FEED_EXPORTERS_BASE = {
'json': 'scrapy.exporters.JsonItemExporter',
'jsonlines': 'scrapy.exporters.JsonLinesItemExporter',
'jl': 'scrapy.exporters.JsonLinesItemExporter',
'csv': 'scrapy.exporters.CsvItemExporter',
'xml': 'scrapy.exporters.XmlItemExporter',
'marshal': 'scrapy.exporters.MarshalItemExporter',
'pickle': 'scrapy.exporters.PickleItemExporter',
}