Part1:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
#分数据集输出x和y的均值
print('The mean of x and y:')
print(anascombe.groupby(['dataset'])[['x', 'y']].mean())
#分数据集输出x和y的方差
print('\nThe varience of x and y:')
print(anascombe.groupby(['dataset'])[['x', 'y']].var())
#分数据集输出x和y的关联系数
print('\nThe correlation coefficient between x and y:')
print(anascombe.groupby(['dataset'])[['x', 'y']].corr());
#对每一个数据集学习一个拟合曲线出来
datasets = ['I', 'II', 'III', 'IV']
for dataset in datasets:
lin_model = smf.ols('y ~ x', anascombe[anascombe['dataset'] == dataset]).fit()
print('\nThe linear model for dataset', dataset)
print(lin_model.summary())
print('\n')
Part2:
import matplotlib.pyplot as plt
import seaborn as sns
g = sns.FacetGrid(anascombe, col='dataset', hue="y")
g.map(plt.scatter, 'x', 'y')