# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
#同一个scrapy爬虫必须是存入同一数据库的,否则将不支持 阙辉
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import os
import QH_Sctock_001_Scrapy.qh_py_file.qh_sqlite_ORM as qh_db_orm #引入excel配置
import QH_Sctock_001_Scrapy.qh_py_file.qh_get_excel_settings as qh_ges #引入excel配置
import QH_Sctock_001_Scrapy.qh_py_file.qh_spider_tool as qh_tool #引入工具
class QhSctock001ScrapyPipeline:
def open_spider(self, spider):
#初始化存数列表
self.qh_oo = []
#爬虫启动时将爬虫id先置空
self.qh_spider_id = ""
self.qh_spider_id_bool = ""
def process_item(self, item, spider):
#相当于是启动爬虫时的初始化
if self.qh_spider_id == "" and self.qh_spider_id_bool == "":
self.qh_spider_id = item["qh_spider_id"]
self.qh_spider_id_bool = self.qh_spider_id #1 第一次赋值
# qh_oo0 = item["qh_new_fied"]
# self.qh_oo.append(qh_oo0)
# 获取数据库配置参数
qh_path = os.path.abspath(os.path.join(os.getcwd(), ".")) # 运行文件所在路径
qh_db_lujing = qh_ges.QH_Get_DB_CanShu(qh_path, self.qh_spider_id_bool)
# out
self.qh_db_path = qh_db_lujing["QH_DB_Path"] # 1.获取数据库路径
self.qh_db_name = qh_db_lujing["QH_DB_Name"] # 2.获取数据库名称
self.qh_table_name = qh_db_lujing["QH_Table_Name"] # 3.获取数据库表名称
self.qh_filed_set = qh_db_lujing["qh_Filed_Set"] # 4.获取数据库表字段的设置列表
self.qh_filed_value = qh_db_lujing["qh_Filed_Value"] # 5.获取数据库表字段的名称
self.qh_id_type = qh_db_lujing["QH_Id_Type"] # 6.获取数据库表字段的ID的拼接方法类型(新)
self.qh_id_type_filed = qh_db_lujing["QH_Id_Type_Filed"] # 7.获取数据库表字段的ID的拼接方法具体值(新)
# 数据库连接(实例化,数据库存在则连接,不存在则先创建再连接)
self.qh_cunshu_db = qh_db_orm.QH_SQLite_DB_CunChu(self.qh_db_path)
if self.qh_spider_id != "" and self.qh_spider_id_bool != "":
self.qh_spider_id = item["qh_spider_id"]
if self.qh_spider_id_bool == self.qh_spider_id:
qh_oo0 = item["qh_new_fied"]
self.qh_oo.append(qh_oo0)
elif self.qh_spider_id_bool != self.qh_spider_id:
#爬虫id变化,表示是新的爬虫
#将上一爬虫爬取的数据存入数据库并保存
Qh_Spider_Id = str(self.qh_spider_id_bool)
Qh_CunShu_Data = self.qh_oo
self.qh_cunshu_db.QH_DT_ChuanCan(Qh_Table_Name = self.qh_table_name,
QH_Table_Set = self.qh_filed_set,
qh_filed_key = self.qh_filed_value,
qh_spider_id = Qh_Spider_Id,
Qh_ShuJu_Data = Qh_CunShu_Data,
Qh_id_lie_c = self.qh_id_type_filed,
Qh_ID_Type = self.qh_id_type)
self.qh_cunshu_db.QH_CunChu_Data_Main()
self.qh_cunshu_db.QH_Cimmit()
#清空上一爬虫的参数并重新初始化
self.qh_oo = []
self.qh_spider_id_bool = self.qh_spider_id #2 第二次赋值 N
qh_oo0 = item["qh_new_fied"]
self.qh_oo.append(qh_oo0)
# 获取数据库配置参数
qh_path = os.path.abspath(os.path.join(os.getcwd(), ".")) # 运行文件所在路径
qh_db_lujing = qh_ges.QH_Get_DB_CanShu(qh_path, self.qh_spider_id_bool)
# out
self.qh_db_path = qh_db_lujing["QH_DB_Path"] # 1.获取数据库路径
self.qh_db_name = qh_db_lujing["QH_DB_Name"] # 2.获取数据库名称
self.qh_table_name = qh_db_lujing["QH_Table_Name"] # 3.获取数据库表名称
self.qh_filed_set = qh_db_lujing["qh_Filed_Set"] # 4.获取数据库表字段的设置列表
self.qh_filed_value = qh_db_lujing["qh_Filed_Value"] # 5.获取数据库表字段的名称
self.qh_id_type = qh_db_lujing["QH_Id_Type"] # 6.获取数据库表字段的ID的拼接方法类型(新)
self.qh_id_type_filed = qh_db_lujing["QH_Id_Type_Filed"] # 7.获取数据库表字段的ID的拼接方法具体值(新)
return item
def close_spider(self,spider):
Qh_Spider_Id = str(self.qh_spider_id_bool)
Qh_CunShu_Data = self.qh_oo
self.qh_cunshu_db.QH_DT_ChuanCan(Qh_Table_Name=self.qh_table_name,
QH_Table_Set=self.qh_filed_set,
qh_filed_key=self.qh_filed_value,
qh_spider_id=Qh_Spider_Id,
Qh_ShuJu_Data=Qh_CunShu_Data,
Qh_id_lie_c=self.qh_id_type_filed,
Qh_ID_Type=self.qh_id_type)
self.qh_cunshu_db.QH_CunChu_Data_Main()
self.qh_cunshu_db.QH_Cimmit()
self.qh_cunshu_db.QH_DB_Close()
12-11
852
03-06
4007
09-22
1568
07-07
1538