import pymysql class MysqlPipeline(object): def __init__(self): self.conn = None self.cur = None def open_spider(self, spider): self.conn = pymysql.connect( host='127.0.0.1', port=3306, user='root', password='root', db='xpc1704', charset='utf8mb4' ) self.cur = self.conn.cursor() def process_item(self, item, spider): if not hasattr(item, 'table_name'): return item cols, values = zip(*item.items()) sql = "INSERT INTO `%s` (%s) VALUES (%s)" % \ ( item.table_name, ','.join(cols), ','.join(['%s'] * len(values)) ) # sql = "INSERT INTO `{}` ({}) VALUES ({})" % \ # "ON DUPLTCATE KEY {}".format( # item.table_name, # ','.join(cols), # ','.join(['%s'] * len(values)), # ','.join(['`{}`=%s'.format(k) for k in cols]) # ) print(sql, values) self.cur.execute(sql, values) self.conn.commit() # print(self.cur._last_executed) return item def close_spider(self, spider): self.cur.close() self.conn.close()
爬虫数据保存到数据库通用格式--------pipelines.py 下载管道
最新推荐文章于 2022-09-25 15:00:17 发布