数据的处理可谓是重中之重,在衡量数据的好坏,确定接下来各项工作的可行性等方面都不能少了它,那么今天就让我们一起去学习怎么样实现简单数据的各项操作吧!
1.平均数
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats
z = pd.read_csv(r'D:\excel\p027.csv')
x = z['X']
y = z['Y']
global x,y # 固定x,y的值
x_mean = np.mean(x) # 通过numpy库中的mean函数直接得到x的平均数
y_mean = np.mean(y)
print("x的平均值为",x_mean)
print("y的平均值为",y_mean)
2.协方差 相关系数
法1:
stdx = np.std(x,ddof=1) # 求x的标准差
stdy = np.std(y,ddof=1)
covxy = np.cov(x,y)[0][1] # 求协方差,[0][1]必须加,相当于索引
corxy = covxy/(stdx*stdy) # 求相关系数的公式
print("x的标准差为",stdx)
print("y的标准差为",stdy)
print("x和y的协方差为",covxy)
print("x和y的相关系数为",corxy)
法2:
3.β0和β1的值
def parameter(x,y):
x_mean = np.mean(x)
y_mean = np.mean(y)
sum_xydeviation = 0 # 定义变量
sum_xdeviation = 0
sum_ydeviation = 0
for i in range(len(x)):
sum_xydeviation+=((y[i]-y_mean)*(x[i]-x_mean)) # 式子求和
sum_xdeviation+=(x[i]-x_mean)**2
bete1 = sum_xydeviation/sum_xdeviation
bete0 = y_mean-x_mean*bete1
return bete1,bete0
print('β1='+str(parameter(x,y)[0]))
print('β0='+str(parameter(x,y)[1]))
4.回归参数的置信区间
5.t检验
6.可决系数
SST = 0
for i in range(n):
SST+=(y[i]-y_mean)**2
R = 1-SSE/SST
print('R=',R)