Part1
1.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa
anc=pd.read_csv('anscombe.csv')
print('The mean of x is\n'+str(anc.groupby('dataset')['x'].mean()))
print('The mean of y is\n'+str(anc.groupby('dataset')['y'].mean()))
print('The variance of x is\n'+str(anc.groupby('dataset')['x'].var()))
print('The variance of y is\n'+str(anc.groupby('dataset')['y'].var()))
print('The mean of x is\n'+str(anc.groupby('dataset')['x'].mean()))
print('The mean of y is\n'+str(anc.groupby('dataset')['y'].mean()))
print('The variance of x is\n'+str(anc.groupby('dataset')['x'].var()))
print('The variance of y is\n'+str(anc.groupby('dataset')['y'].var()))
2.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa
anc=pd.read_csv('anscombe.csv')
print('The correlation coefficient of the first dataset is \n'+str(anc[anc['dataset']=='I'].corr()))
print('The correlation coefficient of the second dataset is \n'+str(anc[anc['dataset']=='II'].corr()))
print('The correlation coefficient of the third dataset is \n'+str(anc[anc['dataset']=='III'].corr()))
print('The correlation coefficient of the fourth dataset is \n'+str(anc[anc['dataset']=='IV'].corr()))
print('The correlation coefficient of the first dataset is \n'+str(anc[anc['dataset']=='I'].corr()))
print('The correlation coefficient of the second dataset is \n'+str(anc[anc['dataset']=='II'].corr()))
print('The correlation coefficient of the third dataset is \n'+str(anc[anc['dataset']=='III'].corr()))
print('The correlation coefficient of the fourth dataset is \n'+str(anc[anc['dataset']=='IV'].corr()))
3.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa
anc=pd.read_csv('anscombe.csv')
md=[0,0,0,0]
md[0] = sfa.ols('y ~ x', anc[anc['dataset'] == 'I']).fit()
md[1] = sfa.ols('y ~ x', anc[anc['dataset'] == 'II']).fit()
md[2] = sfa.ols('y ~ x', anc[anc['dataset'] == 'III']).fit()
md[3] = sfa.ols('y ~ x', anc[anc['dataset'] == 'IV']).fit()
for i in range(4):
print('\n')
print(md[i].summary())
md[0] = sfa.ols('y ~ x', anc[anc['dataset'] == 'I']).fit()
md[1] = sfa.ols('y ~ x', anc[anc['dataset'] == 'II']).fit()
md[2] = sfa.ols('y ~ x', anc[anc['dataset'] == 'III']).fit()
md[3] = sfa.ols('y ~ x', anc[anc['dataset'] == 'IV']).fit()
for i in range(4):
print('\n')
print(md[i].summary())
Part2
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa
anc=pd.read_csv('anscombe.csv')
vsl=sns.FacetGrid(anc,col='dataset')
vsl.map(plt.scatter,'x','y')
plt.show()
vsl.map(plt.scatter,'x','y')
plt.show()