一元线性回归分析实例: 以沪深300指数基金净值为例
基金净值数据格式:
date,jz,ljjz
2019-01-02,1.0194,1.0194
2019-01-03,1.0177,1.0177
linear_mod_1.py
# coding=utf-8
import os, sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
# python一元线性回归分析实例:预测基金净值
if len(sys.argv) ==2:
fcode = sys.argv[1]
else:
print('usage: python linear_mod_1.py fcode ')
sys.exit(1)
if len(fcode) !=6:
print(' fcode is char(6)')
sys.exit(2)
file1 = "./" +fcode +'.csv'
if not os.path.exists(file1):
print(file1 +' is not exists.')
sys.exit(3)
# 用pandas读取csv
df = pd.read_csv(file1)
df = df[ df['date'] > '2019-01-01']
y = df['jz'].values # 基金净值
x = np.arange(0,len(y),1)
# 构造X列表和Y列表,reshape(-1,1)改变数组形状,为只有一个属性
x = x.reshape(-1,1)
y = y.reshape(-1,1)
# 构造回归对象
model = LinearRegression()
model.fit(x, y)
# 获取预测值
predict_y = model.predict(x)
# 构造返回字典
predictions = {}
predictions['intercept'] = model.intercept_ # 截距值
predictions['coefficient'] = model.coef_ # 回归系数(斜率值)
#predictions['predict_value'] = predict_y
print(predictions)
# 绘图
fig, ax = plt.subplots(figsize=(10,6))
# 绘出已知数据散点图
#plt.scatter(x, y, color ='blue')
# 绘曲线图
ax.plot(x, y, '-', label='jz') # 基金净值
# 绘出预测直线
ax.plot(x, predict_y, 'r--.', label='predict')
ax.legend(loc='upper left')
plt.title('predict fund net value: ' +fcode)
plt.xlabel('x')
plt.ylabel('jz')
plt.show()
运行 python linear_mod_1.py 660008
{'intercept': array([1.16350283]), 'coefficient': array([[0.00094415]])}
参考: https://blog.csdn.net/sinat_25873421/article/details/80791531