获取东方财富A股所有股票代码

本文介绍了一个针对东方财富网的爬虫程序实现,包括参数设置、网络数据抓取及存储过程。通过Python实现了从网站获取股票数据,并将其保存到SQLite数据库中。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

# encoding:utf-8
import sys
import os
import datetime
import logging
import time
import csv
import json
# 得到当前根目录  先找到应用的顶级根目录
qh_o_path = os.path.abspath(os.path.join(os.getcwd(), "..")) # 返回当前工作目录   先到达 QH_网络配置 的父文件夹路径
sys.path.append(qh_o_path)        # 添加自己指定的搜索路径
import QH_网络配置.QH_爬虫网络配置 as qh_wl_set
import QH_Sqlite数据库.qh_sqlite_ORM as qh_db_orm
import QH_Sqlite数据库.QH_Get_DB_Set as qh_db_set
import QH_爬虫对象.QH_东方财富网 as qh_PaChong
import QH_爬虫对象.QH_搜狐首页对象 as qh_souhu
import QH_Tool.QH_TOOL as qh_tool
import QH_Log_日志.QH_Log as qh_log

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%参数部分,阙辉%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#爬虫ID
qh_chuan_can = sys.argv
try:
    qh_spider_id = qh_chuan_can[1]
except:
    qh_spider_id = "QH_Spider_00058"
qh_cunchu_id00 = "QH_Spider_00058"
qh_cunchu_id01 = "QH_Spider_00058_1"
qh_cunchu_id02 = "QH_Spider_00058_2"
# 设置log 日志
qh_log_write = qh_log.Qh_log_write(qh_o_path,qh_spider_id)
qh_log_write.info("QH_该应用的顶级目录//:{}".format(qh_o_path))
# 获取搜狐交易日
qh_now_date = qh_souhu.QH_Get_Dangri_Date()  # 参数无意义,只是占位,否则报错
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%网络爬取数据,阙辉%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#获取网络参数
qh_log_write.info('获取网络参数!')
qh_wangluo = qh_wl_set.QH_Get_WangLuo_CanShu(qh_o_path, qh_spider_id)
#out
qh_url = qh_wangluo["QH_URL"]                        # 获取爬虫的URL
qh_header = qh_wangluo["QH_Header"]                  # 获取爬虫的请求头信息
qh_Params = qh_wangluo["QH_Params"]                  # 获取爬虫的GET请求的url参数
qh_Payload = qh_wangluo["QH_Payload"]                # 获取爬虫的POST请求的form参数
qh_filed_name = qh_wangluo["qh_Filed_EN"]            # 获取爬虫的解析字段的中文说明
qh_id_type = qh_wangluo["QH_Id_Type"]                # 获取数据库表字段的ID的拼接方法类型(旧)
qh_id_type_filed = qh_wangluo["QH_Id_Type_Filed"]    # 获取数据库表字段的ID的拼接方法具体值(旧)
#实例化爬虫对象
qh_PaChong_ject = qh_PaChong.QH_DongFangCaiFu_Spider(qh_url, qh_header, qh_Params, qh_filed_name)
qh_JieGuo_List = qh_PaChong_ject.QH_DongFang_Main()
qh_JieGuo_List_stock = []

for qh_i,qh_row in enumerate(qh_JieGuo_List):
    # 添加交易日期
    qh_row.append(qh_now_date)
    # 截取需要的字段存储数据中心
    qh_JieGuo_List_stock_row = []
    qh_zjs = qh_row[12]
    if str(qh_zjs) == "1":
        qh_zjs = "上交所"
    elif str(qh_zjs) == "0":
        qh_zjs = "深交所"
    else:
        qh_zjs = "它交所"
    qh_JieGuo_List_stock_row.append(qh_zjs)           #证交所名称
    qh_JieGuo_List_stock_row.append(qh_row[12])       #证交所 0深交所 1上交所
    qh_JieGuo_List_stock_row.append(qh_row[11])       #股票代码
    qh_JieGuo_List_stock_row.append(qh_row[13])       #股票名称
    qh_JieGuo_List_stock_row.append(qh_row[18])       #总市值
    qh_JieGuo_List_stock_row.append(qh_row[19])       #流通市值
    qh_JieGuo_List_stock_row.append(qh_row[1])        #最新价
    qh_JieGuo_List_stock_row.append(qh_row[25])       #市盈率

    qh_JieGuo_List_stock.append(qh_JieGuo_List_stock_row)

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%数据存储,阙辉%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
def QH_CunChu_Data(qh_o_path,Qh_ShuJu_Data,qh_cunchu_id,qh_spider_id):
    """
    存储数据到数据库,作者:阙辉
    :param qh_o_path:        相对路径
    :param Qh_ShuJu_Data:    需要存储的数据,列表
    :param qh_cunchu_id:     存储方案
    :param qh_spider_id:     爬虫id
    :return:
    """
    if qh_cunchu_id == qh_spider_id:
        qh_log_write.info('存储方案和爬虫id一致_{}!'.format(qh_cunchu_id))
    else:
        qh_log_write.info('存储方案和爬虫id不一致_{}!'.format(qh_cunchu_id))
    #获取数据库参数
    qh_log_write.info('获取数据库参数!')
    qh_db_lujing = qh_db_set.QH_Get_DB_CanShu(qh_o_path, qh_cunchu_id)
    #out
    qh_db_path = qh_db_lujing["QH_DB_Path"]                  # 获取数据库路径
    qh_db_name = qh_db_lujing["QH_DB_Name"]                  # 获取数据库名称
    qh_table_name = qh_db_lujing["QH_Table_Name"]            # 获取数据库表名称
    qh_filed_set = qh_db_lujing["qh_Filed_Set"]              # 获取数据库表字段的设置列表
    qh_filed_value = qh_db_lujing["qh_Filed_Value"]          # 获取数据库表字段的名称
    qh_id_type = qh_db_lujing["QH_Id_Type"]                  # 获取数据库表字段的ID的拼接方法类型(新)
    qh_id_type_filed = qh_db_lujing["QH_Id_Type_Filed"]      # 获取数据库表字段的ID的拼接方法具体值(新)
    #存储数据到数据库   数据中心数据库
    #对象实例化
    qh_cunshu_db = qh_db_orm.QH_SQLite_DB_CunChu(qh_db_path,
                                                 qh_table_name,
                                                 qh_filed_set,
                                                 qh_filed_value,
                                                 qh_cunchu_id,
                                                 Qh_ShuJu_Data,
                                                 Qh_id_lie_c=qh_id_type_filed,
                                                 Qh_ID_Type=qh_id_type)
    qh_db_star = qh_cunshu_db.QH_CunChu_Data_Main()
    if qh_db_star:
        qh_log_write.info('对象数据库存储成功!')
    else:
        qh_log_write.info('对象数据库存储失败!')

#1、存入东方财富数据>>>一只股只存一条
QH_CunChu_Data(qh_o_path,qh_JieGuo_List,qh_cunchu_id00,qh_spider_id)
#2、存入东方财富数据>>>一只股一个交易日只存一条
QH_CunChu_Data(qh_o_path,qh_JieGuo_List,qh_cunchu_id01,qh_spider_id)
#3、存入数据中心>>>一只股只存一条
QH_CunChu_Data(qh_o_path,qh_JieGuo_List_stock,qh_cunchu_id02,qh_spider_id)

### 编写爬取东方财富网全部票数据的Python爬虫 为了全面覆盖所需功能并确保代码可读性和维护性,下面提供了一个较为完整的解决方案来抓取东方财富网上所有票的数据。此方案不仅涵盖了基本的网页请求与解析逻辑,还包含了异常处理机制以及日志记录等功能。 #### 导入库 首先需要导入必要的库: ```python import requests from bs4 import BeautifulSoup import pandas as pd import logging import time ``` #### 设置全局变量和配置项 定义一些常量用于存储URL模板和其他参数设置: ```python BASE_URL = "http://quote.eastmoney.com/center/gridlist.html" HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', } LOG_FILE_NAME = './stock_data_crawler.log' logging.basicConfig(filename=LOG_FILE_NAME, level=logging.INFO, format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s') ``` #### 创建辅助函数 编写几个帮助函数来进行页面加载、HTML解析等工作: ```python def fetch_page(url): try: response = requests.get(url=url, headers=HEADERS) if response.status_code != 200: raise Exception(f"Failed to load page {url}, status code={response.status_code}") return response.text except Exception as e: logging.error(e.args[0]) return None def parse_html(html_content): soup = BeautifulSoup(html_content, features="html.parser") table_body = soup.find('tbody').find_all('tr') data_list = [] for row in table_body: columns = row.find_all('td') stock_info = {} for idx, col in enumerate(columns[:9]): # 假设每行有九列有效信息 key_name = ['code', 'name', 'latest_price', 'change_rate', 'open_price', 'highest_price', 'lowest_price', 'previous_close', 'volume'][idx] value = col.string.strip() stock_info[key_name] = value data_list.append(stock_info) df = pd.DataFrame(data=data_list) return df ``` #### 主程序入口 最后是主循环部分,负责遍历不同类型的市场板块,并保存最终的结果到CSV文件中: ```python if __name__ == '__main__': markets = [ {'type': '#hs_a_board', 'file_path': './沪深A.csv'}, {'type': '#sh_a_board', 'file_path': './上证A.csv'} ] for market in markets: full_url = BASE_URL + market['type'] html_text = fetch_page(full_url) if not html_text: continue dataframe = parse_html(html_text) dataframe.to_csv(market['file_path'], index=False, mode='w+', encoding='utf-8-sig') logging.info(f'Successfully crawled and saved the stocks of type [{market["type"]}] into file.') time.sleep(3) # 防止访问频率过高被封禁 IP 地址[^2] ``` 通过上述方法可以有效地从东方财富网获取所需的票列表及其基本信息。需要注意的是,在实际应用过程中应当遵循目标网站的服务条款,合理控制请求间隔时间和数量以免给服务器带来过大压力[^1]。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值