import requests
import string
import os.path
import pandas
import re
import time
# 请求头放在函数外面共用
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
}
# 1.定义获取org_id的函数,code为证券代码
def get_org_id(code):
org_id_url = 'http://www.cninfo.com.cn/new/information/topSearch/query'
org_id_data = {
'keyWord': code,
'maxNum': '10',
}
org_id_response = requests.post(url=org_id_url, data=org_id_data, headers=headers).json()
# 从response中获取org_id
for data in org_id_response:
org_id = data['orgId']
return org_id
# org_id_response.close()
return None
# 2.定义get_plate_colum, 获取plate: 深圳sz 或者 上海sh 或者 北京bj;third,以及column: 深圳szse 或者 上海sse 或者 北京bj
def get_plate_colum_compy_short_listing(code):
# 如何在网址里加变量:在url前写一个f,就可以在url中以{}的形式写入变量
# global colum, plate
global colum, plate
plate_url = f'http://www.cninfo.com.cn/data20/companyOverview/getCompanyIntroduction?scode={code}'
plate_response = requests.get(url=plate_url, headers=headers).json()
#json的解析提取,用在线网站转换格式,有字典就用序号,没字典就用键名
plate_market = plate_response['data']['records'][0]['basicInformation'][0]['MARKET']
# print(plate_market)
compy_short = plate_response['data']['records'][0]['basicInformation'][0]['ASECNAME']
listing_date = plate_response['data']['records'][0]['basicInformation'][0]['F006D']
# address = plate_response['data']['records'][0]['basicInformation'][0]['F004V'] #获取地址分文件夹
# # print(type(address))
# province = address[0:6]
# print(province)
if plate_market == '上交所':
plate = 'sh'
colum = 'sse'
else:
if str(plate_market)[0:3] == '深交所':
plate = 'sz'
colum = 'szse'
else:
if plate_market == '北交所':
plate = 'bj;third'
colum = 'bj'
else:
print(str(code)+'的所属市场非上非深非北,是:'+str(plate_market))
# print(plate, colum)
# plate_response.close()
return plate, colum, compy_short, listing_date
# 3.定义get_pdf_url_dict项目字典的函数,
def get_pdf_url_dict(code, org_id, plate, colum):
pdf_url_dict = {}
page = 1
while True:
fild_list_data = {
'stock': '{},{}'.format(code, org_id), # code.org_id
'tabName': 'fulltext',
'pageSize': &
巨潮资讯网爬取年报(存在错误)
于 2022-11-30 12:48:37 首次发布