python写的一个-批量下载股票年报的小工具

最新推荐文章于 2024-07-09 21:05:07 发布

guofeng80

最新推荐文章于 2024-07-09 21:05:07 发布

阅读量810

点赞数 2

分类专栏： python

国丰

本文链接：https://blog.csdn.net/jingxinguofeng/article/details/118339969

版权

python 专栏收录该内容

14 篇文章 3 订阅

订阅专栏

python写的一个-批量下载股票年报的小工具

from urllib import request
import requests
import os
import openpyxl


# print(os.getcwd())

def getKeyList(file):

    x = os.getcwd()
    # print(x)
    #y = os.popen('ls').read()
    # print(y)
    wb = openpyxl.load_workbook(file)
    print(wb, type(wb))
    print(wb.sheetnames)

    # print(wb.active)
    sheet = wb.active
    # print(sheet['A1'].value)
    minrow = sheet.min_row  # 最小行
    maxrow = sheet.max_row
    mincol = sheet.min_column
    maxcol = sheet.max_column
    print('该文件', maxrow, '行', maxcol, '列')

    key_list = []
    for i in range(minrow+1, maxrow+1):
        point = 'A' + str(i)
        person = sheet[point].value
        # print(person)
        if person:
            num = person.split("=")[-1].replace('"', "")
            # print(num)
            key_list.append(num)
    key_list.sort()
    return key_list


def stock(key):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
    }
    url1 = f"http://www.cninfo.com.cn/new/information/topSearch/query?keyWord={key}&maxNum=10"
    orgId = requests.post(url1, headers=headers).json()[0]["orgId"]
    url = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
    data = {
        'pageNum':'1',
        'pageSize': '30',
        'column': 'szse',
        'tabName':'fulltext',
        'stock': key+','+orgId, 
        'category': 'category_ndbg_szsh',
        'seDate': '2018-01-01~2021-06-02', 
        'isHLtitle': 'true',
    }

    dict_data = requests.post(url, data=data, headers=headers).json()
    if dict_data["totalRecordNum"] < 1:
        print(key, "无年报数据")
        print(dict_data)
        return False
    for value_dict in dict_data["announcements"]:
            riqi = value_dict["announcementId"]
            name = value_dict["announcementTitle"]
            wjjname1 = value_dict["secName"]
            wjjname = wjjname1.replace(" ", "")

            if not os.path.exists(wjjname + key):
                os.mkdir(wjjname + key)
                print(wjjname + key + '创建成功')

            if name.find('摘要') >= 1:
                continue
            else:  
                url_r = 'http://www.cninfo.com.cn/new/announcement/download?bulletinId='+riqi
                # https://vip.stock.finance.sina.com.cn/corp/go.php/vCB_Bulletin/stockid/000938/page_type/ndbg.phtml
                request.urlretrieve(url_r, wjjname + key + '/' + wjjname + key + '--' + name + '.pdf')
                print(wjjname + name + '保存成功')

if __name__ == '__main__':
    key = input('请输入股票代码:')
    stock(key)    
    # key_list = getKeyList("证劵48.xlsx")  # 可以用表格导入代码
    # print(key_list)

    # for i in key_list:
    #     stock(i)