本文链接：https://blog.csdn.net/goldendog/article/details/80658887

题目：

代码：

import random
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.api as sm    
import statsmodels.formula.api as smf    
import statistics as sta     
import scipy.stats as stats 

anscombe = sns.load_dataset("anscombe")    

ansxI = anscombe.x[:11].values
meanIx = np.mean(ansxI)
print('the mean of x of dataset I: {}'.format(meanIx))
ansxII = anscombe.x[11:22].values
meanIIx = np.mean(ansxII)
print('the mean of x of dataset II: {}'.format(meanIIx))
ansxIII = anscombe.x[22:33].values
meanIIIx = np.mean(ansxIII)
print('the mean of x of dataset III: {}'.format(meanIIIx))
ansxIV = anscombe.x[33:44].values
meanIVx = np.mean(ansxIV)
print('the mean of x of dataset IV: {}'.format(meanIVx))

print('')

varIx = sta.variance(ansxI)
print('the variance of x of dataset I: {}'.format(varIx))
varIIx = sta.variance(ansxII)
print('the variance of x of dataset II: {}'.format(varIIx))
varIIIx = sta.variance(ansxIII)
print('the variance of x of dataset III: {}'.format(varIIIx))
varIVx = sta.variance(ansxIV)
print('the variance of x of dataset IV: {}'.format(varIVx))

print('')

ansyI = anscombe.y[:11].values
meanIy = np.mean(ansyI)
print('the mean of y of dataset I: {}'.format(meanIy))
ansyII = anscombe.y[11:22].values
meanIIy = np.mean(ansyII)
print('the mean of y of dataset II: {}'.format(meanIIy))
ansyIII = anscombe.y[22:33].values
meanIIIy = np.mean(ansyIII)
print('the mean of y of dataset III: {}'.format(meanIIIy))
ansyIV = anscombe.y[33:44].values
meanIVy = np.mean(ansyIV)
print('the mean of y of dataset IV: {}'.format(meanIVy))

print('')

varIy = sta.variance(ansyI)
print('the variance of y of dataset I: {}'.format(varIy))
varIIy = sta.variance(ansyII)
print('the variance of y of dataset II: {}'.format(varIIy))
varIIIy = sta.variance(ansyIII)
print('the variance of y of dataset III: {}'.format(varIIIy))
varIVy = sta.variance(ansyIV)
print('the variance of y of dataset IV: {}'.format(varIVy))

print('')

cofI = stats.pearsonr(ansxI, ansyI)[0]
print('the correlation coefficient of dataset I: {}'.format(cofI))
cofII = stats.pearsonr(ansxII, ansyII)[0]
print('the correlation coefficient of dataset II: {}'.format(cofII))
cofIII = stats.pearsonr(ansxIII, ansyIII)[0]
print('the correlation coefficient of dataset III: {}'.format(cofIII))
cofIV = stats.pearsonr(ansxIV, ansyIV)[0]
print('the correlation coefficient of dataset IV: {}'.format(cofIV))

print('')

xI = sm.add_constant(ansxI)
modI = sm.OLS(ansyI, xI)
resI = modI.fit()
print('the linear regression line of dataset I:') 
print('y = {} + {} * x'.format(resI.params[0], resI.params[1]))
xII = sm.add_constant(ansxII)
modII = sm.OLS(ansyII, xII)
resII = modII.fit()
print('the linear regression line of dataset II:') 
print('y = {} + {} * x'.format(resII.params[0], resII.params[1]))
xIII = sm.add_constant(ansxIII)
modIII = sm.OLS(ansyIII, xIII)
resIII = modIII.fit()
print('the linear regression line of dataset III:') 
print('y = {} + {} * x'.format(resIII.params[0], resIII.params[1]))
xIV = sm.add_constant(ansxIV)
modIV = sm.OLS(ansyIV, xIV)
resIV = modIV.fit()
print('the linear regression line of dataset IV:') 
print('y = {} + {} * x'.format(resIV.params[0], resIV.params[1]))

sns.set(style = 'whitegrid')
g = sns.FacetGrid(anscombe, col = 'dataset', hue = 'dataset', size = 3)
g.map(plt.scatter, 'x', 'y')
plt.show()

结果：