input
# Code you have previously used to load data
import pandas as pd
# Path of the file to read
iowa_file_path = '../input/home-data-for-ml-course/train.csv'
home_data = pd.read_csv(iowa_file_path)
# Set up code checking
from learntools.core import binder
binder.bind(globals())
from learntools.machine_learning.ex3 import *
print("Setup Complete")
output
Setup Complete
input
# print the list of columns in the dataset to find the name of the prediction target
y = home_data.SalePrice
print(y)
# Check your answer
step_1.check()
output
0 208500
1 181500
2 223500
3 140000
4 250000
...
1455 175000
1456 210000
1457 266500
1458 142125
1459 147500
Name: SalePrice, Length: 1460, dtype: int64
Correct
input
# Create the list of features below
feature_names = home_features = ['LotArea','YearBuilt','1stFlrSF','2ndFlrSF','FullBath','BedroomAbvGr','TotRmsAbvGrd']
# Select data corresponding to features in feature_names
X = home_data[home_features]
# Check your answer
step_2.check()
output
Correct
input
# Review data
#print description or statistics from X
print(X.describe())
#print the top few lines
print(X.head())
output
LotArea YearBuilt 1stFlrSF 2ndFlrSF FullBath \
count 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000
mean 10516.828082 1971.267808 1162.626712 346.992466 1.565068
std 9981.264932 30.202904 386.587738 436.528436 0.550916
min 1300.000000 1872.000000 334.000000 0.000000 0.000000
25% 7553.500000 1954.000000 882.000000 0.000000 1.000000
50% 9478.500000 1973.000000 1087.000000 0.000000 2.000000
75% 11601.500000 2000.000000 1391.250000 728.000000 2.000000
max 215245.000000 2010.000000 4692.000000 2065.000000 3.000000
BedroomAbvGr TotRmsAbvGrd
count 1460.000000 1460.000000
mean 2.866438 6.517808
std 0.815778 1.625393
min 0.000000 2.000000
25% 2.000000 5.000000
50% 3.000000 6.000000
75% 3.000000 7.000000
max 8.000000 14.000000
LotArea YearBuilt 1stFlrSF 2ndFlrSF FullBath BedroomAbvGr \
0 8450 2003 856 854 2 3
1 9600 1976 1262 0 2 3
2 11250 2001 920 866 2 3
3 9550 1915 961 756 1 3
4 14260 2000 1145 1053 2 4
TotRmsAbvGrd
0 8
1 6
2 6
3 7
4 9
input
from sklearn.tree import DecisionTreeRegressor
#specify the model.
#For model reproducibility, set a numeric value for random_state when specifying the model
iowa_model = DecisionTreeRegressor(random_state=1)
# Fit the model
iowa_model.fit(X,y)
# Check your answer
step_3.check()
output
Correct
input
predictions = iowa_model.predict(X)
print(predictions)
# Check your answer
step_4.check()
output
[208500. 181500. 223500. ... 266500. 142125. 147500.]
Correct