第1关:使用scikit-learn导入数据集
from sklearn import datasets
def getIrisData():
'''
导入Iris数据集
返回值:
X - 前5条训练特征数据
y - 前5条训练数据类别
X_shape - 训练特征数据的二维数组大小
'''
#初始化
X = []
y = []
X_shape = ()
# 请在此添加实现代码 #
#********** Begin *********#
iris = datasets.load_iris()
X = iris.data[:5]
y = iris.target[:5]
X_shape = iris.data.shape
#********** End **********#
return X,y,X_shape
第2关:数据预处理 — 标准化
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing
'''
Data descrption:
The data contains 20,640 observations on 9 variables.
This dataset contains the average house value as target variable
and the following input variables (features): average income,
housing average age, average rooms, average bedrooms, population,
average occupation, latitude, and longitude in that order.
dataset : dict-like object with the following attributes:
dataset.data : ndarray, shape [20640, 8]
Each row corresponding to the 8 feature values in order.
dataset.target : numpy array of shape (20640,)
Each value corresponds to the average house value in units of 100,000.
dataset.feature_names : array of length 8
Array of ordered feature names used in the dataset.
dataset.DESCR : string
Description of the California housing dataset.
'''
dataset = fetch_california_housing("./step4/")
X_full, y = dataset.data, dataset.target
#抽取其中两个特征数据
X = X_full[:, [0, 5]]
def getMinMaxScalerValue():
'''
对特征数据X进行MinMaxScaler标准化转换,并返回转换后的数据前5条
返回值:
X_first5 - 数据列表
'''
X_first5 = []
# 请在此添加实现代码 #
# ********** Begin *********#
X_first5 = MinMaxScaler().fit_transform(X)
X_first5 = X_first5[:5]
# ********** End **********#
return X_first5
def getScaleValue():
'''
对目标数