python写的一个-批量下载股票年报的小工具
from urllib import request
import requests
import os
import openpyxl
# print(os.getcwd())
def getKeyList(file):
x = os.getcwd()
# print(x)
#y = os.popen('ls').read()
# print(y)
wb = openpyxl.load_workbook(file)
print(wb, type(wb))
print(wb.sheetnames)
# print(wb.active)
sheet = wb.active
# print(sheet['A1'].value)
minrow = sheet.min_row # 最小行
maxrow = sheet.max_row
mincol = sheet.min_column
maxcol = sheet.max_column
print('该文件', maxrow, '行', maxcol, '列')
key_list = []
for i in range(minrow+1, maxrow+1):
point = 'A' + str(i)
person = sheet[point].value
# print(person)
if person:
num = person.split("=")[-1].replace('"', "")
# print(num)
key_list.append(num)
key_list.sort()
return key_list
def stock(key):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
url1 = f"http://www.cninfo.com.cn/new/information/topSearch/query?keyWord={key}&maxNum=10"
orgId = requests.post(url1, headers=headers).json()[0]["orgId"]
url = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
data = {
'pageNum':'1',
'pageSize': '30',
'column': 'szse',
'tabName':'fulltext',
'stock': key+','+orgId,
'category': 'category_ndbg_szsh',
'seDate': '2018-01-01~2021-06-02',
'isHLtitle': 'true',
}
dict_data = requests.post(url, data=data, headers=headers).json()
if dict_data["totalRecordNum"] < 1:
print(key, "无年报数据")
print(dict_data)
return False
for value_dict in dict_data["announcements"]:
riqi = value_dict["announcementId"]
name = value_dict["announcementTitle"]
wjjname1 = value_dict["secName"]
wjjname = wjjname1.replace(" ", "")
if not os.path.exists(wjjname + key):
os.mkdir(wjjname + key)
print(wjjname + key + '创建成功')
if name.find('摘要') >= 1:
continue
else:
url_r = 'http://www.cninfo.com.cn/new/announcement/download?bulletinId='+riqi
# https://vip.stock.finance.sina.com.cn/corp/go.php/vCB_Bulletin/stockid/000938/page_type/ndbg.phtml
request.urlretrieve(url_r, wjjname + key + '/' + wjjname + key + '--' + name + '.pdf')
print(wjjname + name + '保存成功')
if __name__ == '__main__':
key = input('请输入股票代码:')
stock(key)
# key_list = getKeyList("证劵48.xlsx") # 可以用表格导入代码
# print(key_list)
# for i in key_list:
# stock(i)