1、在setting.py中定义相关变量
#定义mysql相关变量
MYSQL_HOST = '127.0.0.1'
MYSQL_USER = 'root'
MYSQL_PWD = '123456'
MYSQL_DB = 'maoyandb'
MYSQL_CHAR = 'utf8'
2、pipelines.py中新建管道类,并导入settings模块
def open_spider(self,spider):
# 爬虫开始执行1次,用于数据库连接
def process_item(self,item,spider):
# 用于处理抓取的item数据
def close_spider(self,spider):
# 爬虫结束时执行1次,用于断开数据库连接
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
class MaoyanPipeline(object):
def process_item(self, item, spider):
print(item['name'])
print(item['star'])
print(item['time'])
return item
import pymysql
from .settings import *
#定义一个MYSQL管道类
class MaoyanMysqlPipeline(object):
def open_spider(self,spider):
#爬虫程序启动时,只执行一次,一般用于建立数据库连接
self.db = pymysql.connect(
host = MYSQL_HOST,
user = MYSQL_USER,
password = MYSQL_PWD,
database = MYSQL_DB,
charset = MYSQL_CHAR
)
self.cursor = self.db.cursor()
print("我是open_spider函数")
def process_item(self, item, spider):
ins = 'insert into filmtab values(%s,%s,%s)'
film_list = [
item["name"],item["star"],item["time"]
]
self.cursor.execute(ins,film_list)
self.db.commit()
#必须写,此函数返回值会交给下一个管道处理item数据
return item
def close_spider(self,spider):
# 爬虫程序结束时,只执行一次,一般用于断开数据库连接
self.cursor.close()
self.db.close()
print("我是close_spider函数")
3、settings.py中添加此管道
ITEM_PIPELINES = {'':200}
image.png
# 注意 :process_item() 函数中一定要 return item
必须写,此函数返回值会交给下一个管道处理item数据