下面的代码是结合pandas和sqlalchemy将数据写入到mysql数据库中。
# -*- coding: utf-8 -*-
# 管道的作用主要是做数据清洗
from eie.middlewares import udf_config
from sqlalchemy.engine import create_engine
import pandas as pd
from eie import settings
import threading
from scrapy.exceptions import DropItem
logger = udf_config.logger
engine = create_engine('mysql+mysqldb://{}:{}@{}:3306/{}'.format(settings.MYSQL_USER, settings.MYSQL_PASSWD,
settings.MYSQL_HOST, settings.MYSQL_DBNAME), connect_args={'charset': 'utf8'}, pool_size=settings.MYSQL_POOL_SIZE)
mutex = threading.Lock()
class EiePipeline(object):
def process_item(self, item, spider):
df = pd.DataFrame([item])
logger.debug(df)
try:
df.to_sql('eie_ip', engine, if_exists='append', index=False)
except Exception, e:
raise DropItem('insert to mysql error! %s, %s' % (item, e))
return item
def close_spider(self, spider):
pass