第十四周作业

题目:

代码:

import random
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.api as sm    
import statsmodels.formula.api as smf    
import statistics as sta     
import scipy.stats as stats 

anscombe = sns.load_dataset("anscombe")    

ansxI = anscombe.x[:11].values
meanIx = np.mean(ansxI)
print('the mean of x of dataset I: {}'.format(meanIx))
ansxII = anscombe.x[11:22].values
meanIIx = np.mean(ansxII)
print('the mean of x of dataset II: {}'.format(meanIIx))
ansxIII = anscombe.x[22:33].values
meanIIIx = np.mean(ansxIII)
print('the mean of x of dataset III: {}'.format(meanIIIx))
ansxIV = anscombe.x[33:44].values
meanIVx = np.mean(ansxIV)
print('the mean of x of dataset IV: {}'.format(meanIVx))

print('')

varIx = sta.variance(ansxI)
print('the variance of x of dataset I: {}'.format(varIx))
varIIx = sta.variance(ansxII)
print('the variance of x of dataset II: {}'.format(varIIx))
varIIIx = sta.variance(ansxIII)
print('the variance of x of dataset III: {}'.format(varIIIx))
varIVx = sta.variance(ansxIV)
print('the variance of x of dataset IV: {}'.format(varIVx))

print('')

ansyI = anscombe.y[:11].values
meanIy = np.mean(ansyI)
print('the mean of y of dataset I: {}'.format(meanIy))
ansyII = anscombe.y[11:22].values
meanIIy = np.mean(ansyII)
print('the mean of y of dataset II: {}'.format(meanIIy))
ansyIII = anscombe.y[22:33].values
meanIIIy = np.mean(ansyIII)
print('the mean of y of dataset III: {}'.format(meanIIIy))
ansyIV = anscombe.y[33:44].values
meanIVy = np.mean(ansyIV)
print('the mean of y of dataset IV: {}'.format(meanIVy))

print('')

varIy = sta.variance(ansyI)
print('the variance of y of dataset I: {}'.format(varIy))
varIIy = sta.variance(ansyII)
print('the variance of y of dataset II: {}'.format(varIIy))
varIIIy = sta.variance(ansyIII)
print('the variance of y of dataset III: {}'.format(varIIIy))
varIVy = sta.variance(ansyIV)
print('the variance of y of dataset IV: {}'.format(varIVy))

print('')

cofI = stats.pearsonr(ansxI, ansyI)[0]
print('the correlation coefficient of dataset I: {}'.format(cofI))
cofII = stats.pearsonr(ansxII, ansyII)[0]
print('the correlation coefficient of dataset II: {}'.format(cofII))
cofIII = stats.pearsonr(ansxIII, ansyIII)[0]
print('the correlation coefficient of dataset III: {}'.format(cofIII))
cofIV = stats.pearsonr(ansxIV, ansyIV)[0]
print('the correlation coefficient of dataset IV: {}'.format(cofIV))

print('')

xI = sm.add_constant(ansxI)
modI = sm.OLS(ansyI, xI)
resI = modI.fit()
print('the linear regression line of dataset I:') 
print('y = {} + {} * x'.format(resI.params[0], resI.params[1]))
xII = sm.add_constant(ansxII)
modII = sm.OLS(ansyII, xII)
resII = modII.fit()
print('the linear regression line of dataset II:') 
print('y = {} + {} * x'.format(resII.params[0], resII.params[1]))
xIII = sm.add_constant(ansxIII)
modIII = sm.OLS(ansyIII, xIII)
resIII = modIII.fit()
print('the linear regression line of dataset III:') 
print('y = {} + {} * x'.format(resIII.params[0], resIII.params[1]))
xIV = sm.add_constant(ansxIV)
modIV = sm.OLS(ansyIV, xIV)
resIV = modIV.fit()
print('the linear regression line of dataset IV:') 
print('y = {} + {} * x'.format(resIV.params[0], resIV.params[1]))

sns.set(style = 'whitegrid')
g = sns.FacetGrid(anscombe, col = 'dataset', hue = 'dataset', size = 3)
g.map(plt.scatter, 'x', 'y')
plt.show()

结果:


参考资料:

求相关系数:https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html

求线性拟合:http://www.statsmodels.org/devel/generated/statsmodels.regression.linear_model.OLS.html


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值