scrapy 同步与异步储存MySQL数据

1.同步


  
  
  1. class MysqlPipeline(object):
  2. """
  3. 采用同步的机制写入mysql
  4. """
  5. def __init__(self):
  6. self.conn = pymysql.connect(host= "127.0.0.1", user= "root", password= "ts123456", db= "art_schema", port= 3306)
  7. self.cursor = self.conn.cursor()
  8. def process_item(self, item, spider):
  9. insert_sql = """
  10. insert into jobbole_article(title,url,create_date,fav_nums)
  11. values({title},{url},{create_date},{fav_nums});
  12. """.format(title=item[ 'title'], url=item[ 'url'], create_date=item[ 'create_date'], fav_nums=item[ 'fav_nums'])
  13. self.cursor.execute(insert_sql)
  14. self.conn.commit()
  15. return item

2.异步


  
  
  1. class MysqlTwistedPipeline(object):
  2. def __init__(self, dbpool):
  3. self.dbpool = dbpool
  4. @classmethod
  5. def from_settings(cls, settings):
  6. dbparms = dict(
  7. host=settings[ 'MYSQL_HOST'],
  8. db=settings[ 'MYSQL_DBNAME'],
  9. user=settings[ 'MYSQL_USER'],
  10. passwd=settings[ 'MYSQL_PASSWORD'],
  11. port=settings[ 'MYSQL_PORT'],
  12. charset= 'utf8',
  13. cursorclass=pymysql.cursors.DictCursor,
  14. use_unicode= True
  15. )
  16. dbpool = adbapi.ConnectionPool( 'pymysql', **dbparms)
  17. return cls(dbpool)
  18. def process_item(self, item, spider):
  19. """
  20. 使用twisted将mysql插入变成异步执行,采用异步的机制写入mysql
  21. :param item:
  22. :param spider:
  23. :return:
  24. """
  25. query = self.dbpool.runInteraction(self.do_insert, item)
  26. # 处理异常
  27. query.addErrback(self.handle_error)
  28. def handle_error(self, failure):
  29. # 处理异步插入的异常
  30. print(failure)
  31. def do_insert(self, cursor, item):
  32. insert_sql = """
  33. insert into jobbole_article(title,url,create_date,fav_nums,url_object_id,front_image_url,
  34. front_image_path,praise_nums,comment_nums,tags,content) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);
  35. """
  36. cursor.execute(insert_sql,
  37. (item[ 'title'], item[ 'url'], item[ 'create_date'], item[ 'fav_nums'], item[ 'url_object_id']
  38. , item[ 'front_image_url'], item[ 'front_image_path'], item[ 'praise_nums'], item[ 'comment_nums']
  39. , item[ 'tags'], item[ 'content']))

3.修改MysqlTwistedPipeline为公用pipeline(防止多个pipeline去打开数据库造成异常)

item.py中的修改,增加一个get_insert_sql()


  
  
  1. class JobBoleArticleItem(scrapy.Item):
  2. title = scrapy.Field(
  3. input_processor=MapCompose( lambda x: x + "-jobbole")
  4. # output_processor=TakeFirst()
  5. )
  6. create_date = scrapy.Field()
  7. url = scrapy.Field()
  8. url_object_id = scrapy.Field()
  9. front_image_url = scrapy.Field(
  10. output_processor=MapCompose(return_value)
  11. )
  12. front_image_path = scrapy.Field()
  13. praise_nums = scrapy.Field(
  14. input_processor=MapCompose(get_nums)
  15. )
  16. fav_nums = scrapy.Field(
  17. input_processor=MapCompose(get_nums)
  18. )
  19. comment_nums = scrapy.Field(
  20. input_processor=MapCompose(get_nums)
  21. )
  22. tags = scrapy.Field(
  23. input_processor=MapCompose(remove_comment_tags),
  24. output_processor=Join( ",")
  25. )
  26. content = scrapy.Field()
  27. def get_insert_sql (self):
  28. insert_sql = """
  29. insert into jobbole_article(title,url,create_date,fav_nums,url_object_id,front_image_url,
  30. front_image_path,praise_nums,comment_nums,tags,content)
  31. values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);
  32. """
  33. parsms = (self[ 'title'], self[ 'url'], self[ 'create_date'], self[ 'fav_nums'], self[ 'url_object_id']
  34. , self[ 'front_image_url'], self[ 'front_image_path'], self[ 'praise_nums'], self[ 'comment_nums']
  35. , self[ 'tags'], self[ 'content'])
  36. return insert_sql,parsms

pipeline中的修改:


  
  
  1. def do_insert(self, cursor, item):
  2. # 执行具体的数据插入
  3. # 根据不同item构建不同的sql语句并插入到mysql中
  4. insert_sql, params = item.get_insert_sql()
  5. cursor.execute(insert_sql, params)


  
  

特别注意:port=3306不能是str类型

转载: https://blog.csdn.net/qq_15695761/article/details/79931120

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值