作业题目如下:
第一题:
代码如下:
import numpy as np
import scipy.stats
import pandas
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
def get_var(X):
length = len(X)
average = 0
for x in X:
average = average + x
average = average / length
variance = 0
for x in X:
variance = variance + (x - average)**2
return variance/(length - 1) # 这里分母为n-1
#问题一
ans = sns.load_dataset("anscombe") #题目要求的数据集
Xarray = []
Yarray = []
i = 0
while i < 4:
sub_x = ans.x.values[i*11:i*11 + 10] #数据6个一组
Xarray.append(sub_x)
i = i + 1
i = 0
while i < 4:
sub_y = ans.y.values[i*11:i*11 + 10] #数据6个一组
Yarray.append(sub_y)
i = i + 1
i = 0
while i < 4:
mean = np.mean(Xarray[i])
print("the mean of the " + str(i) + " x is "+str(mean))
variance = get_var(Xarray[i])
print("the variance of the " + str(i) + " x is " + str(variance))
i = i + 1
i = 0
while i < 4:
mean = np.mean(Yarray[i])
print("the mean of the " + str(i) + " y is "+str(mean))
variance = get_var(Yarray[i])
print("the variance of the " + str(i) + " y is " + str(variance))
i = i + 1
for i in range(0, 4):
coefficient = scipy.stats.pearsonr(Xarray[i], Yarray[i])[0]
print("the coefficient of "+str(i) + " is "+ str(coefficient))
for i in range(0, 4):
model = sm.OLS(Yarray[i], sm.add_constant(Xarray[i]))
coefficient2 = model.fit().params
print("the fuction of "+str(i)+" is : y = " + str (coefficient2[1]) + "x" + "+" + str(coefficient2[0]))
运行结果如下:
第二题代码如下:
pic = sns.FacetGrid(ans, col = "dataset" )
pic.map(plt.scatter, 'x', 'y')
plt.show()
运行结果如图: