python 3.70
scrapy 1.60
windows 10.01
爬取一部小说 小说网址
- 设置items
import scrapy
class Novel1Item(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
title = scrapy.Field()
chapter_name = scrapy.Field()
content = scrapy.Field()
- 设置pipelines
import codecs
import json
class Novel1Pipeline(object):
def __init__(self):
print('starting')
self.file = codecs.open('text_novel1.json', 'w', encoding='utf-8')
def process_item(self, item, spider):
json_text = json.dumps(dict