scrapy万能插入sql写法
本次示例为scrapy 爬取的豆瓣top250网站(https://movie.douban.com/top250)
item.py
title=scrapy.Field()
comment = scrapy.Field()
link = scrapy.Field()
quote = scrapy.Field()
rank = scrapy.Field()
该字段名称和数据库中字段对应名称必须相同,顺序可以不一致
重点了
import pymysql
conn=pymysql.connect(
host='localhost', user='root', passwd='数据库密码',
port = 3306, db = '数据库名称', charset = 'utf8'
)
#游标
cursor=conn.cursor()
class WannnegSqlPipeline(object):
def __init__(self):
#初始化的表名
self.table_name='ceshisql'
def process_item(self, item, spider):
keys, values = zip(*item.items())
sql = "INSERT INTO `{}` ({}) VALUES ({}) ON DUPLICATE KEY UPDATE {}".format(
self.table_name,
','.join(keys),
','.join(['%s'] * len(values)),
','.join(['`{}`=%s'.format(k) for k in keys])
)
cursor.execute(sql, values * 2)
conn.commit()
return item