Scrapy使用MySQL

供以后写代码参考:

一、同步执行

import pymysql


class CollectDataPipeline(object):
    def open_spider(self, spider):
        print('open' * 20)
        self.conn = pymysql.connect(host="127.0.0.1", user="root", password="****", database="collect_data",
                                    charset="utf8")
        self.db_cur = self.conn.cursor()

    def close_spider(self, spider):
        self.conn.close()

    def process_item(self, item, spider):
        val = (item['title'],
               item['contents'],
               item['time'])
        # 先删除数据
        sql = '''delete from data where length(time)>0;'''
        self.db_cur.execute(sql)
        self.conn.commit()
        # 插入新数据
        sql = '''INSERT INTO data(title,contents,time) VALUES(%s,%s,%s);'''
        self.db_cur.execute(sql, val)
        self.conn.commit()
        return item

二、异步执行

import pymysql
from twisted.enterprise import adbapi
# 异步更新操作
class LvyouPipeline(object):
    def __init__(self, dbpool):
        self.dbpool = dbpool
 
    @classmethod
    def from_settings(cls, settings):  # 函数名固定,会被scrapy调用,直接可用settings的值
        """
        数据库建立连接
        :param settings: 配置参数
        :return: 实例化参数
        """
        adbparams = dict(
            host=settings['MYSQL_HOST'],
            db=settings['MYSQL_DBNAME'],
            user=settings['MYSQL_USER'],
            password=settings['MYSQL_PASSWORD'],
            charset='utf8',
            cursorclass=pymysql.cursors.DictCursor   # 指定cursor类型
        )
 
        # 连接数据池ConnectionPool,使用pymysql或者Mysqldb连接
        dbpool = adbapi.ConnectionPool('pymysql', **adbparams)
        # 返回实例化参数
        return cls(dbpool)
 
    def process_item(self, item, spider):
        """
        使用twisted将MySQL插入变成异步执行。通过连接池执行具体的sql操作,返回一个对象
        """
        query = self.dbpool.runInteraction(self.do_insert, item)  # 指定操作方法和操作数据
        # 添加异常处理
        query.addCallback(self.handle_error)  # 处理异常

    #其他代码都一样,SQL语句不一样即可,可复用。 
    def do_insert(self, cursor, item):
        val=(item['Name'], item['Address'], item['Grade'], item['Score'],item['Price'])
        # 对数据库进行插入操作,并不需要commit,twisted会自动commit
        insert_sql = """
        insert into daname(name1, address, grade, score, price) VALUES (%s,%s,%s,%s,%s)
        """
        self.cursor.execute(insert_sql, val)
 
    def handle_error(self, failure):
        if failure:
            # 打印错误信息
            print(failure)

 

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
可以使用以下代码连接ScrapyMySQL: 1. 首先,需要在Scrapy项目的settings.py文件中添加以下代码: ``` ITEM_PIPELINES = { 'myproject.pipelines.MySQLPipeline': 300, } MYSQL_HOST = 'localhost' MYSQL_DBNAME = 'mydatabase' MYSQL_USER = 'myusername' MYSQL_PASSWORD = 'mypassword' ``` 2. 然后,在Scrapy项目的pipelines.py文件中添加以下代码: ``` import pymysql class MySQLPipeline(object): def __init__(self, host, dbname, user, password): self.host = host self.dbname = dbname self.user = user self.password = password @classmethod def from_crawler(cls, crawler): return cls( host=crawler.settings.get('MYSQL_HOST'), dbname=crawler.settings.get('MYSQL_DBNAME'), user=crawler.settings.get('MYSQL_USER'), password=crawler.settings.get('MYSQL_PASSWORD') ) def open_spider(self, spider): self.conn = pymysql.connect( host=self.host, user=self.user, password=self.password, db=self.dbname, charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor ) def close_spider(self, spider): self.conn.close() def process_item(self, item, spider): with self.conn.cursor() as cursor: sql = "INSERT INTO mytable (column1, column2, column3) VALUES (%s, %s, %s)" cursor.execute(sql, (item['column1'], item['column2'], item['column3'])) self.conn.commit() return item ``` 3. 最后,在Scrapy项目的items.py文件中定义你的item: ``` import scrapy class MyItem(scrapy.Item): column1 = scrapy.Field() column2 = scrapy.Field() column3 = scrapy.Field() ``` 这样,当你运行Scrapy爬虫时,它会将数据存储到MySQL数据库中。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值