# encoding:utf-8
import sys
import os
import datetime
import logging
import time
import csv
import json
# 得到当前根目录 先找到应用的顶级根目录
qh_o_path = os.path.abspath(os.path.join(os.getcwd(), "..")) # 返回当前工作目录 先到达 QH_网络配置 的父文件夹路径
sys.path.append(qh_o_path) # 添加自己指定的搜索路径
import QH_网络配置.QH_爬虫网络配置 as qh_wl_set
import QH_Sqlite数据库.qh_sqlite_ORM as qh_db_orm
import QH_Sqlite数据库.QH_Get_DB_Set as qh_db_set
import QH_爬虫对象.QH_新浪股票对象01 as qh_PaChong
import QH_爬虫对象.QH_搜狐首页对象 as qh_souhu
import QH_Tool.QH_TOOL as qh_tool
import QH_Log_日志.QH_Log as qh_log
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%参数部分,阙辉%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#爬虫ID
qh_chuan_can = sys.argv
try:
qh_spider_id = qh_chuan_can[1]
except:
qh_spider_id = "QH_Spider_00059"
qh_cunchu_id00 = "QH_Spider_00059"
qh_cunchu_id01 = "QH_Spider_00059_1"
qh_cunchu_id02 = "QH_Spider_00059_2"
# 设置log 日志
qh_log_write = qh_log.Qh_log_write(qh_o_path,qh_spider_id)
qh_log_write.info("QH_该应用的顶级目录//:{}".format(qh_o_path))
# 获取搜狐交易日
qh_now_date = qh_souhu.QH_Get_Dangri_Date() # 参数无意义,只是占位,否则报错
#股票板块
qh_BanKuai = {"上交所_A股":"sh_a",
"上交所_B股":"sh_b",
"深交所_A股":"sz_a",
"深交所_B股":"sz_b",
"深交所_创业板":"cyb",
"上交所_科创版":"kcb",
"北交所":"hs_bjs",
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%网络爬取数据,阙辉%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#获取网络参数
qh_log_write.info('获取网络参数!')
qh_wangluo = qh_wl_set.QH_Get_WangLuo_CanShu(qh_o_path, qh_spider_id)
#out
qh_url = qh_wangluo["QH_URL"] # 获取爬虫的URL
qh_header = qh_wangluo["QH_Header"] # 获取爬虫的请求头信息
qh_Params = qh_wangluo["QH_Params"] # 获取爬虫的GET请求的url参数
qh_Payload = qh_wangluo["QH_Payload"] # 获取爬虫的POST请求的form参数
qh_filed_name = qh_wangluo["qh_Filed_EN"] # 获取爬虫的解析字段的中文说明
qh_id_type = qh_wangluo["QH_Id_Type"] # 获取数据库表字段的ID的拼接方法类型(旧)
qh_id_type_filed = qh_wangluo["QH_Id_Type_Filed"] # 获取数据库表字段的ID的拼接方法具体值(旧)
#实例化爬虫对象
qh_PaChong_ject = qh_PaChong.QH_XinLang_Spider(qh_url, qh_header, qh_Params, qh_filed_name)
#循环板块爬取股票列表
qh_JieGuo_List = []
qh_JieGuo_List_stock = []
for qh_key,qh_value in qh_BanKuai.items():
qh_bk = qh_value #板块
qh_bk_name = qh_key #板块名称
qh_Params["node"] = qh_value #更改板块url参数
qh_PaChong.qh_params = qh_Params #url参数传入请求
qh_JieGuo_List01 = qh_PaChong_ject.QH_XinLang_Main() #重新发起请求
qh_JieGuo_List_stock01 = []
for qh_i,qh_row in enumerate(qh_JieGuo_List01): #循环加入字段
qh_row.append(qh_bk_name) #带证交所的板块
qh_row.append(qh_bk_name.split("_")[0]) #证交所
qh_row.append(qh_now_date) #交易日期
#截取存入数据中心的股票列表字段
qh_JieGuo_List_stock_row = []
qh_JieGuo_List_stock_row.append(qh_row[21]) #证交所
qh_JieGuo_List_stock_row.append(qh_row[20]) #证交所板块
qh_JieGuo_List_stock_row.append(qh_row[3]) #股票代码
qh_JieGuo_List_stock_row.append(qh_row[15]) #股票代码0
qh_JieGuo_List_stock_row.append(qh_row[6]) #总市值
qh_JieGuo_List_stock_row.append(qh_row[8]) #流通市值
qh_JieGuo_List_stock_row.append(qh_row[11]) #市盈率
qh_JieGuo_List_stock_row.append(qh_row[13]) #最新价
qh_JieGuo_List_stock01.append(qh_JieGuo_List_stock_row)
#合并各个板块的数据
qh_JieGuo_List = qh_JieGuo_List + qh_JieGuo_List01
qh_JieGuo_List_stock = qh_JieGuo_List_stock + qh_JieGuo_List_stock01
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%数据存储,阙辉%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
def QH_CunChu_Data(qh_o_path,Qh_ShuJu_Data,qh_cunchu_id,qh_spider_id):
"""
存储数据到数据库,作者:阙辉
:param qh_o_path: 相对路径
:param Qh_ShuJu_Data: 需要存储的数据,列表
:param qh_cunchu_id: 存储方案
:param qh_spider_id: 爬虫id
:return:
"""
if qh_cunchu_id == qh_spider_id:
qh_log_write.info('存储方案和爬虫id一致_{}!'.format(qh_cunchu_id))
else:
qh_log_write.info('存储方案和爬虫id不一致_{}!'.format(qh_cunchu_id))
#获取数据库参数
qh_log_write.info('获取数据库参数!')
qh_db_lujing = qh_db_set.QH_Get_DB_CanShu(qh_o_path, qh_cunchu_id)
#out
qh_db_path = qh_db_lujing["QH_DB_Path"] # 获取数据库路径
qh_db_name = qh_db_lujing["QH_DB_Name"] # 获取数据库名称
qh_table_name = qh_db_lujing["QH_Table_Name"] # 获取数据库表名称
qh_filed_set = qh_db_lujing["qh_Filed_Set"] # 获取数据库表字段的设置列表
qh_filed_value = qh_db_lujing["qh_Filed_Value"] # 获取数据库表字段的名称
qh_id_type = qh_db_lujing["QH_Id_Type"] # 获取数据库表字段的ID的拼接方法类型(新)
qh_id_type_filed = qh_db_lujing["QH_Id_Type_Filed"] # 获取数据库表字段的ID的拼接方法具体值(新)
#存储数据到数据库 数据中心数据库
#对象实例化
qh_cunshu_db = qh_db_orm.QH_SQLite_DB_CunChu(qh_db_path,
qh_table_name,
qh_filed_set,
qh_filed_value,
qh_cunchu_id,
Qh_ShuJu_Data,
Qh_id_lie_c=qh_id_type_filed,
Qh_ID_Type=qh_id_type)
qh_db_star = qh_cunshu_db.QH_CunChu_Data_Main()
if qh_db_star:
qh_log_write.info('对象数据库存储成功!')
else:
qh_log_write.info('对象数据库存储失败!')
#1、存入东新浪数据>>>一只股只存一条
QH_CunChu_Data(qh_o_path,qh_JieGuo_List,qh_cunchu_id00,qh_spider_id)
#1、存入新浪数据>>>一只股一个交易日只存一条
QH_CunChu_Data(qh_o_path,qh_JieGuo_List,qh_cunchu_id01,qh_spider_id)
#1、存入数据中心>>>一只股只存一条
QH_CunChu_Data(qh_o_path,qh_JieGuo_List_stock,qh_cunchu_id02,qh_spider_id)
请求数据》》》解析数据》》》存入数据库