使用这个小技巧需要确保数据库字段名跟Scrapy的item字段名一样
##models.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 15:54
# @Author : kbsonlong
# @Blog : https://www.alongparty.cn
# @File : models.py
# @Software: PyCharm
# @Email : kbsonlong@gmail.com
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, Date, DateTime, Text,Float,BigInteger
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
####链家租房
class Houses(Base):
__tablename__ = "houses"
house_num = Column(BigInteger, primary_key=True)
house_area= Column(Float, nullable=False, default=0)
house_type= Column(String(512), nullable=False, default='')
house_floor= Column(String(512), nullable=False, default='')
house_orientation= Column(String(512), nullable=False, default='')
traffic = Column(String(512), nullable=False, default='')
house_name= Column(String(512), nullable=False, default='')
house_address = Column(String(512), nullable=False, default='')
house_price = Column(Integer, nullable=False, default=0)
house_describe = Column(String(512), nullable=False, default='')
house_url = Column(String(512), nullable=False, default='')
contact_id = Column(BigInteger, nullable=False, default=0)
def create_session():
# declare the connecting to the server
engine = create_engine("mysql+pymysql://root:root@127.0.0.1:3306/testdb?charset=utf8",echo=False)
# connect session to active the action
Session = sessionmaker(bind=engine)
session = Session()
Base.metadata.create_all(engine)
return session
##items.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 15:54
# @Author : kbsonlong
# @Blog : https://www.alongparty.cn
# @File : items.py
# @Software: PyCharm
# @Email : kbsonlong@gmail.com
import six
from scrapy import Field,Item
###SQL拼接函数
def insert_sql(item,table_name):
table_name = table_name
col_str = ''
row_str = ''
for key in item.keys():
col_str = col_str + "" + key + ","
row_str = "{}'{}',".format(row_str, item[key] if "'" not in item[key] else item[key].replace("'", "\\'"))
sql = "insert INTO {} ({}) VALUES ({}) ON DUPLICATE KEY UPDATE".format(table_name, col_str[1:-1],
row_str[:-1])
for (key, value) in six.iteritems(item):
sql += "{} = '{}',".format(key, value if "'" not in value else value.replace("'", "\\'"))
sql = sql[:-2]
return sql
####链家租房
class HouseItem(Item):
house_num = Field()
house_area= Field()
house_type= Field()
house_floor= Field()
house_orientation= Field()
traffic = Field()
house_name= Field()
house_address = Field()
house_price = Field()
house_describe = Field()
house_url = Field()
contact_id = Field()
def get_insert_sql(self,table_name):
isql = insert_sql(self,table_name)
return isql
##pipelines.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 15:54
# @Author : kbsonlong
# @Blog : https://www.alongparty.cn
# @File : pipelines.py
# @Software: PyCharm
# @Email : kbsonlong@gmail.com
import models
import pymysql
import pymysql.cursors
from twisted.enterprise import adbapi
from settings import MYSQL_CONN
class MysqlTwistedPipeline(object):
def __init__(self, dbpool):
models.create_session() ##调用orm进行数据库初始化
self.dbpool = dbpool
@classmethod
def from_settings(cls, settings):
dbpool = adbapi.ConnectionPool("pymysql", host=MYSQL_CONN["host"], db=MYSQL_CONN["db"],
user=MYSQL_CONN["user"], password=MYSQL_CONN["password"], charset="utf8",
cursorclass=pymysql.cursors.DictCursor,
use_unicode=True)
return cls(dbpool)
def process_item(self, item, spider):
# 使用twisted将mysql插入变成异步执行
self.dbpool.runInteraction(self.do_insert,item)
def do_insert(self, cursor, item):
# 执行具体的插入
# 根据不同的item 构建不同的sql语句并插入到mysql中
insert_sql = item.get_insert_sql(models.Houses.__tablename__)
cursor.execute(insert_sql)
参考资料: