#2021.10.13 HIT ATCI LZH
#数据集为boston房价与周边环境等因素,参照网上的例子
import tensorflow as tf
from tensorflow.python.ops import init_ops
from tensorflow.python.training import optimizer
import numpy as np
import matplotlib.pyplot as plt
#加载boston房价数据集
boston = tf.contrib.learn.datasets.load_dataset('boston')
X_train, Y_train = boston.data, boston.target
#对数据集的格式进行解析
print('数据加载成功!')
print('boston.type is',type(boston))
print('X_train.type =', type(X_train))
print('X_train.ndim =',X_train.ndim)
print('X_train.shape =',X_train.shape)
print('X_train.dtype =',X_train.dtype)
print('X_train 的行数 m = {0}, 列数 n = {1}'.format(X_train.shape[0],X_train.shape[1]))
print('Y_train.type =', type(Y_train))
print('Y_train.ndim =',Y_train.ndim)
print('Y_train.shape =',Y_train.shape)
print('Y_train 的行数 m = {0}, 列数 n = None'.format(Y_train.shape[0]))
print('Y_train.dtype =',Y_train.dtype)
boston数据集是我们在机器学习中经常用到的, 上面代码是我对它的数据类型以及数据集的大小进行了解析,结果如下:
数据加载成功!
boston.type is <class 'tensorflow.contrib.learn.python.learn.datasets.base.Dataset'>
X_train.type = <class 'numpy.ndarray'>
X_train.ndim = 2
X_train.shape = (506, 13)
X_train.dtype = float64
X_train 的行数 m = 506, 列数 n = 13
Y_train.type = <class 'numpy.ndarray'>
Y_train.ndim = 1
Y_train.shape = (506,)
Y_train 的行数 m = 506, 列数 n = None
Y_train.dtype = float64
可以看到X_trian和Y_train都是ndarray类型。
打开下载好的数据集:
Variables in order:
CRIM per capita crime rate by town
ZN proportion of residential land zoned for lots over 25,000 sq.ft.
INDUS proportion of non-retail business acres per town
CHAS Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
NOX nitric oxides concentration (parts per 10 million)
RM average number of rooms per dwelling
AGE proportion of owner-occupied units built prior to 1940
DIS weighted distances to five Boston employment centres
RAD index of accessibility to radial highways
TAX full-value property-tax rate per $10,000
PTRATIO pupil-teacher ratio by town
B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
LSTAT % lower status of the population
MEDV Median value of owner-occupied homes in $1000's
0.00632 18.00 2.310 0 0.5380 6.5750 65.20 4.0900 1 296.0 15.30
396.90 4.98 24.00
0.02731 0.00 7.070 0 0.4690 6.4210 78.90 4.9671 2 242.0 17.80
396.90 9.14 21.60
0.02729 0.00 7.070 0 0.4690 7.1850 61.10 4.9671 2 242.0 17.80
392.83 4.03 34.70
0.03237 0.00 2.180 0 0.4580 6.9980 45.80 6.0622 3 222.0 18.70
394.63 2.94 33.40
0.06905 0.00 2.180 0 0.4580 7.1470 54.20 6.0622 3 222.0 18.70
396.90 5.33 36.20
0.02985 0.00 2.180 0 0.4580 6.4300 58.70 6.0622 3 222.0 18.70
394.12 5.21 28.70
0.08829 12.50 7.870 0 0.5240 6.0120 66.60 5.5605 5 311.0 15.20
395.60 12.43 22.90
0.14455 12.50 7.870 0 0.5240 6.1720 96.10 5.9505 5 311.0 15.20
396.90 19.15 27.10
0.21124 12.50 7.870 0 0.5240 5.6310 100.00 6.0821 5 311.0 15.20
386.63 29.93 16.50
0.17004 12.50 7.870 0 0.5240 6.0040 85.90 6.5921 5 311.0 15.20
386.71 17.10 18.90
0.22489 12.50 7.870 0 0.5240 6.3770 94.30 6.3467 5 311.0 15.20
392.52 20.45 15.00
0.11747 12.50 7.870 0 0.5240 6.0090 82.90 6.2267 5 311.0 15.20
396.90 13.27 18.90
0.09378 12.50 7.870 0 0.5240 5.8890 39.00 5.4509 5 311.0 15.20
390.50 15.71 21.70
0.62976 0.00 8.140 0 0.5380 5.9490 61.80 4.7075 4 307.0 21.00
396.90 8.26 20.40
由此我们也能看到数据集中的数据都是float64