piplelines.py -------------------------------
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import sqlite3
from copy import deepcopy
from twisted.enterprise import adbapi
class MapbarspiderPipeline:
def __init__(self, sqlite_file, sqlite_table):
self.sqlite_file = sqlite_file
self.sqlite_table = sqlite_table
self.dbpool = adbapi.ConnectionPool('sqlite3',
database=self.sqlite_file,
check_same_thread=False)
self.conn=object()
self.cur=object()
@classmethod
def from_crawler(cls, crawler):
return cls(
sqlite_file = crawler.settings.get('SQLITE_DB_NAME'), # 从 settings.py 提取
sqlite_table = crawler.settings.get('SQLITE_TABLE', 'items')
)
def open_spider(self, spider):
self.conn = sqlite3.connect(self.sqlite_file)
#self.cur = self.conn.cursor()
try:
create_tb_cmd='''
CREATE TABLE IF NOT EXISTS mapbar
(city_code varchar(255),
city_name varchar(255),
root_sort varchar(255),
item_sort varchar(255),
item_href varchar(255),
uid varchar(255),
area_name varchar(255),
address_name varchar(255),
address_addr varchar(255),
address_lon double,
address_lat double
);
'''
#主要就是上面的语句
self.conn.execute(create_tb_cmd)
self.conn.close()
except:
print("Create table failed")
def close_spider(self, spider):
#self.conn.close()
if len(spider.resp_items_list) > 0:
bulkdata = deepcopy(spider.resp_items_list)
self.dbpool.runInteraction(self.bulk_insert_to_sqlite, spider.itemkeys, bulkdata)
# self.bulk_insert_to_sqlite(spider.itemkeys, spider.pgs)
# 清空缓冲区
del spider.resp_items_list[:]
def process_item(self, item, spider):
if len(spider.resp_items_list) == 1000:
bulkdata = deepcopy(spider.resp_items_list)
self.dbpool.runInteraction(self.bulk_insert_to_sqlite, spider.itemkeys, bulkdata)
# 清空缓冲区
del spider.resp_items_list[:]
""" insert_sql = "insert into {0}({1}) values ({2})".format(self.sqlite_table,
','.join(item_deepcopy.keys()),
','.join(['?'] * len(item_deepcopy.keys())))
#print(item.fields.values())
mytuple=tuple(item_deepcopy.values())
self.cur.execute(insert_sql, mytuple)
self.conn.commit() """
return item
def bulk_insert_to_sqlite(self, tx, itemkeys, bulkdata):
try:
insert_sql = "insert into {0}({1}) values ({2})".format(self.sqlite_table,
', '.join(itemkeys),
', '.join(['?'] * len(itemkeys)))
tx.executemany(insert_sql, bulkdata)
except sqlite3.Error as why:
print(why.args[0])
#logging.info(why.args[0])
scrapy百度POI爬虫实战项目代码(三)
最新推荐文章于 2021-01-20 20:21:01 发布