piplines.py
class DuoTestPipeline(object): def __init__(self,db_pool): self.db_pool=db_pool @classmethod #从setting中读取数据 def from_settings(cls, settings): db_params = dict( db=settings.get('MYSQL_DB_NAME'), host=settings.get('MYSQL_HOST'), port=settings.get('MYSQL_PORT'), user=settings.get('MYSQL_USER'), passwd=settings.get('MYSQL_PASSWORD'), charset=settings.get('MYSQL_CHARSET'), use_unicode=True, # 设置游标类型 cursorclass=pymysql.cursors.DictCursor ) # 创建连接池 db_pool = adbapi.ConnectionPool('pymysql', **db_params) return cls(db_pool) def process_item(self, item, spider): #把要执行的sql放入连接池 query=self.db_pool.runInteraction(self.insert_into, item) #如果sql执行发送错误自动回调addErrBack()函数 query.addErrback(self.handle_error,item,spider) def insert_into(self,cursor,item): cursor.execute(item['zong'][0], item['zong'][1]) def handle_error(self,failure,item,spider): print(failure) def data_list(self): pass
items.py
storage_type = scrapy.Field() # 存储类型 analysis_type = scrapy.Field() # 解析网站 zong = scrapy.Field()#数据汇总
settings.py
MYSQL_DB_NAME='**' MYSQL_HOST='**' MYSQL_USER='**' MYSQL_PASSWORD='**' MYSQL_POST=3306 MYSQL_CHARSET='utf8mb4'
spider.py
sql='insert into tiebadata(publish_name,publish_time,publish_url,publish_content,comment_content,comment_time,comment_name,keyword,app_name,run_time)VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' data=(item['publish_name'], item['publish_time'], item['publish_url'], item['publish_content'], item['comment_content'], item['comment_time'], item['comment_name'], item['keyword'], item['app_name'], item['run_time']) item['zong']=[sql,data]
如有不恰当处,请指出谢谢