创建副本
导数据:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path='D:/python机器学习/数据/housing.csv'
f=open('D:/python机器学习/数据/housing.csv')
housing=pd.read_csv(f)
housing['income_cat']=pd.cut(housing['median_income'],bins=[0.,1.5,3.0,4.5,6.,np.inf],labels=[1,2,3,4,5])
from sklearn.model_selection import StratifiedShuffleSplit
split=StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(housing, housing['income_cat']):
strat_train_set=housing.loc[train_index]
strat_test_set=housing.loc[test_index]
查看训练集:
strat_train_set.info()
创建训练集副本:
housing=strat_train_set.copy()
可视化数据
kind='scatter'
housing.plot(kind='scatter',x='longitude