本文代码基于系列八代码的基础上修改
1.安装mysqlclient
命令行执行:pip install mysqlclient
2.配置数据库连接信息
在settings.py文件加入数据库连接,属性名没有规定可以随便起
3.获取数据库配置,连接数据库
4.执行数据库操作
5.提交和关闭操作
6.加入到数据清洗的管道
完整代码
class MySQLPipeline(object):
def open_spider(self,spider):
#第一个参数是settings.py里的属性,第二个参数是获取不到值的时候的替代值
db = spider.settings.get("MYSQL_DB_NAME","qidian")
host = spider.settings.get("MYSQL_HOST","localhost")
user = spider.settings.get("MYSQL_USER","root")
pwd = spider.settings.get("MYSQL_PASSWORD","root")
#连接数据库
self.db_conn = MySQLdb.connect(db = db, host = host, user = user, password = pwd, charset = "utf8")
#获取游标
self.db_cursor = self.db_conn.cursor()
def process_item(self, item, spider):
values = (item['name'],item['author'],item['type'],item['form'])
insertSQL = "insert into hot(name,author,type,form) values(%s,%s,%s,%s)"
self.db_cursor.execute(insertSQL, values)
return item
def close_spider(self,spider):
#提交数据
self.db_conn.commit()
#关闭游标
self.db_cursor.close()
#关闭连接
self.db_conn.close()