一、将我们再命名的文件中的代码写好之后yield item中
item = HongxiuItem()
item['title'] = title
item['author'] = author
item['tags'] = tags
item['total_word_num'] = total_word_num
item['keep_num'] = keep_num
item['click_num'] = click_num
item['info'] = info
yield item
二、我们还要在 items 中配置field
class HongxiuItem(scrapy.Item):
title = scrapy.Field()
author = scrapy.Field()
tags = scrapy.Field()
total_word_num = scrapy.Field()
keep_num = scrapy.Field()
click_num = scrapy.Field()
info = scrapy.Field()
三、接下来我们要写 pipelines的代码
import pymysql
class HongxiuPipeline(object):
# open_spider()和close_spider():只在爬虫被打开和关闭时,执行一次。
def open_spider(self, spider):
self.connect = pymysql.connect(
host='localhost',
user='root',
port=3306,
passwd='123456',
db='hongxiu',
charset='utf8'
)
self.cursor = self.connect.cursor()
def process_item(self, item, spider):
insert_sql = "INSERT INTO hx(title, author, tags, total_word_num, keep_num, click_num, info) VALUES (%s, %s, %s, %s, %s, %s, %s)"
self.cursor.execute(insert_sql, (
item['title'], item['author'], item['tags'], item['total_word_num'], item['keep_num'], item['click_num'],
item['info']))
self.connect.commit()
def close_spider(self, spider):
self.cursor.close()
self.connect.close()
四、我们还要在settings中放开我们的 ITEM_PIPELINES
ITEM_PIPELINES = {
'hongxiu.pipelines.HongxiuPipeline': 300,
}