In [1]:
# 载入此项目所需要的库
import numpy as np
import pandas as pd
import sys
import sklearn
import visuals as vs # Supplementary code
# 检查你的Python版本
from sys import version_info
if version_info.major != 2 and version_info.minor != 7:
raise Exception('请使用Python 2.7来完成此项目')
# 让结果在notebook中显示
%matplotlib inline
In [2]:
# 载入波士顿房屋的数据集
data = pd.read_csv('housing.csv')
prices = data['MEDV']
features = data.drop('MEDV', axis = 1)
# 完成
print "Boston housing dataset has {} data points with {} variables each.".format(*data.shape)
In [3]:
#找到并查看要用的数据
in_file = 'housing.csv'
full_data = pd.read_csv(in_file)
display (full_data.head())
In [4]:
#将要用的数据提取出来
outcomes_MEDV = full_data['MEDV']
display (outcomes_MEDV.head())
In [5]:
#TODO 1
#目标:计算价值的最小值
minimum_price = outcomes_MEDV.min()
#目标:计算价值的最大值
maximum_price = outcomes_MEDV.max()
#目标:计算价值的平均值
mean_price = outcomes_MEDV.mean()
#目标:计算价值的中值
median_price = outcomes_MEDV.median()
#目标:计算价值的标准差
std_price = outcomes_MEDV.std()
#目标:输出计算的结果
print "Statistics for Boston housing dataset:\n"
print "Minimum price: ${:,.2f}".format(minimum_price)
print "Maximum price: ${:,.2f}".format(maximum_price)
print "Mean price: ${:,.2f}".format(mean_price)
print "Median price ${:,.2f}".format(median_price)
print "Standard deviation of prices: ${:,.2f}".format(std_price)