毕设部分代码(测试):处理专利数据+经纬度计算距离+插值法构建利率曲线,后续补充

# https://zhuanlan.zhihu.com/p/365897117
# https://blog.csdn.net/weixin_50646402/article/details/130780623
# https://blog.csdn.net/jtj2002/article/details/131263607

result = pd.DataFrame(columns = ['Stkcd','Accper','GPtotal','GPfaming','GPshiyong']) # 专利申请数
a=1
for code in jian_zhuanlidata['Stkcd'].unique():
    a=a+1
    format_jian_zhuanlidata = jian_zhuanlidata[jian_zhuanlidata['Stkcd'] == code] # 提取对应代码
    for nianfen in format_jian_zhuanlidata['Accper']:
        nianfen_format_jian_zhuanlidata = jian_zhuanlidata[jian_zhuanlidata['Accper'] == nianfen] # 提取对应代码所在年份
        faming_count = 0
        shiyong_count = 0
        total_count = 0
        for AppliNo in format_jian_zhuanlidata['AppliNo']: # 判断所在年份是1还是2
            print(AppliNo)
            print('AppliNo',AppliNo[4])
            if AppliNo[4] ==1:
                faming_count += 1
            if AppliNo[4] ==2:
                shiyong_count += 1
            total_count = faming_count + shiyong_count
            
        rt = pd.DataFrame({'Stkcd': [code],
                   'Accper': [nianfen],
                   'GPtotal': [total_count],
                   'GPfaming': [faming_count],
                   'GPshiyong': [shiyong_count]})

        # counts = format_jian_zhuanlidata['Accper'].value_counts().sort_index()
        # rt = pd.DataFrame(rt)
        result = pd.concat([result,rt])
    if a==1:
        break
result = result.reset_index(drop=True)
print(result)


# for code in jian_zhaunlidata['Stkcd'].unique():
#     format_jian_zhaunlidata = jian_zhaunlidata[jian_zhaunlidata['Stkcd'] == code]
#     for nianfen in format_jian_zhaunlidata['Accper']:
#         nianfen_format_jian_zhaunlidata = format_jian_zhaunlidata[format_jian_zhaunlidata['Accper'] == nianfen]
#         nianfen_format_jian_zhaunlidata['GPtotal'] = len(nianfen_format_jian_zhaunlidata.index)
#         nianfen_format_jian_zhaunlidata['GPfaming'] = len(nianfen_format_jian_zhaunlidata[nianfen_format_jian_zhaunlidata['AppliNo'] // 10000 % 10 == 1].index)
#         nianfen_format_jian_zhaunlidata['GPshiyong'] = len(nianfen_format_jian_zhaunlidata[nianfen_format_jian_zhaunlidata['AppliNo'] // 10000 % 10 == 2].index)
#         result = pd.concat([result, nianfen_format_jian_zhaunlidata], ignore_index=True)

# 输出结果
# print(result.head())

经纬度查询距离:

pip install cpca
from geopy.distance import geodesic
 
# 参数为两个元组,每个元组包含经度和纬度
coord_1 = (39.917978, 116.396288) # 北京天安门坐标
coord_2 = (31.230416, 121.473701) # 上海市区坐标
distance = geodesic(coord_1, coord_2).km # 距离结果单位为千米
 
print("距离为:{:.2f}千米".format(distance)) # 输出距离结果

pip install geopy

pip install chinese_province_city_area_mapper
# 方法一:直接通过 CPCATransformer 指定
cpca = CPCATransformer({"朝阳区":"北京市"})
df = cpca.transform(location_str)
 
# 方法二:通过内置模块 umap 调用默认地址字典
from chinese_province_city_area_mapper import myumap
cpca = CPCATransformer(myumap.umap)
df = cpca.transform(location_str)
from chinese_province_city_area_mapper.infrastructure import SuperMap
#地区到市的映射数据库,是一个字典类型(key为区名,value为其所属的市名),注意其中包含重复的区名
SuperMap.area_city_mapper
#重复的区名列表,列表类型,如果区名在这个列表中,则area_city_mapper的映射是不准确的
SuperMap.rep_areas
#市到省的映射数据库,字典类型(key为市的名称,value为省的名称)
SuperMap.city_province_mapper
#全国省市区的经纬度数据库,字典类型(key为"省,市,区",value为(维度,经度))
SuperMap.lat_lon_mapper
#获取北京市朝阳区的经纬度
SuperMap.lat_lon_mapper.get("北京市,北京市,朝阳区")

插值法构建整个区间内的利率曲线:

from scipy.interpolate import CubicSpline
import numpy as np

tenors = ['1M', '3M', '6M', '9M', '1Y', '2Y', '3Y', '5Y', '7Y', '10Y', '15Y', '20Y', '30Y', '40Y', '50Y']
zeroRates = [1.7394,2.1815,2.2711,2.2586,2.2743,2.3635,2.4480,2.5514,2.7146,2.7103,2.8371,2.8882,3.0855,3.2349,3.2833] # # 中债即期收益率  东方财富-利率走势-债券数据库-即期收益率
# 将时间点转换为年份
tenors_in_years = []
for tenor in tenors:
    if tenor[-1] == 'M':
        tenors_in_years.append(int(tenor[:-1])/12)
    elif tenor[-1] == 'Y':
        tenors_in_years.append(int(tenor[:-1]))

# 使用样条插值方法计算即期利率
cs = CubicSpline(tenors_in_years, zeroRates)

spotRates = cs(np.arange(1/12, 50+1/12, 1/12))

sequence = []
for i in range(1, 602):
    sequence.append(str(i) + "M")
    
sequence = sequence[:-1] # Remove the last comma

# print(sequence)

# 输出结果
df = pd.DataFrame({'term': sequence, 'ytm': spotRates})
df

 

ALTER TABLE ks_crawler1.k_cbt_daily_report
DROP COLUMN date_time,
DROP COLUMN bondCode;
DELETE FROM ks_crawler1.k_cbt_daily_report WHERE bondCode IS NOT NULL;
SELECT  count(*)  FROM ks_crawler1.k_cbt_daily_report WHERE bondCode IS NOT NULL;
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值