scrapy异步写入mysql_Scrapy异步存入Mysql小技巧

本文介绍了如何在Scrapy中实现异步写入MySQL数据库,通过创建ORM模型,定义SQL拼接函数和自定义管道,确保字段对应,并使用twisted库实现异步操作,提高数据存储效率。
摘要由CSDN通过智能技术生成

使用这个小技巧需要确保数据库字段名跟Scrapy的item字段名一样

##models.py

#!/usr/bin/env python

# -*- coding: utf-8 -*-

# @Time : 15:54

# @Author : kbsonlong

# @Blog : https://www.alongparty.cn

# @File : models.py

# @Software: PyCharm

# @Email : kbsonlong@gmail.com

from sqlalchemy import create_engine

from sqlalchemy import Column, Integer, String, Date, DateTime, Text,Float,BigInteger

from sqlalchemy.orm import sessionmaker

from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()

####链家租房

class Houses(Base):

__tablename__ = "houses"

house_num = Column(BigInteger, primary_key=True)

house_area= Column(Float, nullable=False, default=0)

house_type= Column(String(512), nullable=False, default='')

house_floor= Column(String(512), nullable=False, default='')

house_orientation= Column(String(512), nullable=False, default='')

traffic = Column(String(512), nullable=False, default='')

house_name= Column(String(512), nullable=False, default='')

house_address = Column(String(512), nullable=False, default='')

house_price = Column(Integer, nullable=False, default=0)

house_describe = Column(String(512), nullable=False, default='')

house_url = Column(String(512), nullable=False, default='')

contact_id = Column(BigInteger, nullable=False, default=0)

def create_session():

# declare the connecting to the server

engine = create_engine("mysql+pymysql://root:root@127.0.0.1:3306/testdb?charset=utf8",echo=False)

# connect session to active the action

Session = sessionmaker(bind=engine)

session = Session()

Base.metadata.create_all(engine)

return session

##items.py

#!/usr/bin/env python

# -*- coding: utf-8 -*-

# @Time : 15:54

# @Author : kbsonlong

# @Blog : https://www.alongparty.cn

# @File : items.py

# @Software: PyCharm

# @Email : kbsonlong@gmail.com

import six

from scrapy import Field,Item

###SQL拼接函数

def insert_sql(item,table_name):

table_name = table_name

col_str = ''

row_str = ''

for key in item.keys():

col_str = col_str + "" + key + ","

row_str = "{}'{}',".format(row_str, item[key] if "'" not in item[key] else item[key].replace("'", "\\'"))

sql = "insert INTO {} ({}) VALUES ({}) ON DUPLICATE KEY UPDATE".format(table_name, col_str[1:-1],

row_str[:-1])

for (key, value) in six.iteritems(item):

sql += "{} = '{}',".format(key, value if "'" not in value else value.replace("'", "\\'"))

sql = sql[:-2]

return sql

####链家租房

class HouseItem(Item):

house_num = Field()

house_area= Field()

house_type= Field()

house_floor= Field()

house_orientation= Field()

traffic = Field()

house_name= Field()

house_address = Field()

house_price = Field()

house_describe = Field()

house_url = Field()

contact_id = Field()

def get_insert_sql(self,table_name):

isql = insert_sql(self,table_name)

return isql

##pipelines.py

#!/usr/bin/env python

# -*- coding: utf-8 -*-

# @Time : 15:54

# @Author : kbsonlong

# @Blog : https://www.alongparty.cn

# @File : pipelines.py

# @Software: PyCharm

# @Email : kbsonlong@gmail.com

import models

import pymysql

import pymysql.cursors

from twisted.enterprise import adbapi

from settings import MYSQL_CONN

class MysqlTwistedPipeline(object):

def __init__(self, dbpool):

models.create_session() ##调用orm进行数据库初始化

self.dbpool = dbpool

@classmethod

def from_settings(cls, settings):

dbpool = adbapi.ConnectionPool("pymysql", host=MYSQL_CONN["host"], db=MYSQL_CONN["db"],

user=MYSQL_CONN["user"], password=MYSQL_CONN["password"], charset="utf8",

cursorclass=pymysql.cursors.DictCursor,

use_unicode=True)

return cls(dbpool)

def process_item(self, item, spider):

# 使用twisted将mysql插入变成异步执行

self.dbpool.runInteraction(self.do_insert,item)

def do_insert(self, cursor, item):

# 执行具体的插入

# 根据不同的item 构建不同的sql语句并插入到mysql中

insert_sql = item.get_insert_sql(models.Houses.__tablename__)

cursor.execute(insert_sql)

参考资料:

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值