首先,MySQL创建好数据库和表
image
然后编写各个模块
item.py
import scrapy
class JianliItem(scrapy.Item):
name = scrapy.Field()
url = scrapy.Field()
pipeline.py
import pymysql #导入数据库的类
class JianliPipeline(object):
conn = None
cursor = None
def open_spider(self,spider):
print('开始爬虫')
self.conn = pymysql.Connect(host='127.0.0.1',port=3306,user='root',password='',db='jianli') #链接数据库
def process_item(self, item, spider): #编写向数据库中存储数据的相关代码
self.cursor = self.conn.cursor() #1.链接数据库
sql = 'insert into jl values("%s","%s")'%(item['name'],item['url']) #2.执行sql语句
try: #执行事务
sel