import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import requests sess=tf.Session() house_url='https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data' file=requests.get(house_url) housing_header=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT','MEDV'] col_used=['CRIM','INDUS','NOX','RM','AGE','DIS','TAX','PTRATIO','B','LSTAT'] num_features=len(col_used) housing_data=[ [ float(x) for x in y.split(' ') if len(x) >=1 ] for y in file.text.split('\n') if len(y)>=1] y_vals=np.transpose([np.array([y[13] for y in housing_data ]) ]) x_vals=np.array([[ x for i,x in enumerate(y) if housing_header[i] in col_used ] for y in housing_data ]) x_vals=(x_vals-x_vals.min(0))/x_vals.ptp(0) #分割训练集 测试集 train_indices=np.random.choice(len(x_vals),round(len(x_vals)*0.8),replace=False) test_indices=list(set(range(len(x_vals)))-set(train_indices)) #数组分片操作 使得 x_vals必须要array类型 x_vals_train=x_vals[train_indices] y_vals_trian=y_vals[train_indices] x_vals_test=x_vals[test_indices] y_vals_test=y_vals[test_indices] k=4 batch_size=len(x_vals_test) # Initialize placeholders x_data_train = tf.placeholder(shape=[None, num_features], dtype=tf.float32) x_data_test = tf.placeholder(shape=[None, num_features], dtype=tf.float32) y_data_train = tf.placeholder(shape=[None, 1], dtype=tf.float32) y_data_test = tf.placeholder(shape=[None, 1], dtype=tf.float32) #创建L1距离 训练集405 测试集 101 目标是获得shape=(101,405) distance=tf.reduce_sum( tf.abs( tf.subtract(x_data_train,tf.expand_dims(x_data_test,1))),reduction_indices=2) #创建预测函数(101,4) k=4 top_k_xvals,top_k_indices=tf.nn.top_k(tf.negative(distance),k=k) #(101,) x_middle=tf.reduce_sum(top_k_xvals,1) #(101,1) 求和 4个元素 相加 得到 x_sums=tf.expand_dims(x_middle,1) #(101,4) 因为要除以和 所以复制为4份 x_sum_repeats=tf.matmul(x_sums,tf.ones([1,k],tf.float32)) #(101,1,4) 因为 要与(101,4,1)做乘法运算 所以升维 x_vals_weight=tf.expand_dims(tf.div(top_k_xvals,x_sum_repeats),1) #(101,4,1) =gather( (101,1) , (101,4)) top_k_yvals=tf.gather(y_data_train,top_k_indices) #(101,1) squeeze( (101,1,1) ) prediction=tf.squeeze(tf.matmul(x_vals_weight,top_k_yvals),squeeze_dims=1) #均方误差 mes=tf.div(tf.reduce_sum(tf.square(tf.subtract(prediction,y_data_test))),batch_size) num_loops=int(np.ceil(len(x_vals_test)/batch_size)) for i in range(num_loops): min_index=i*batch_size max_index=min((i+1)*batch_size,len(x_vals_test)) x_batch=x_vals_test[min_index:max_index] y_batch=y_vals_test[min_index:max_index] predictions=sess.run(prediction, feed_dict={x_data_train: x_vals_train, x_data_test: x_batch, y_data_train: y_vals_trian}) batch_mse=sess.run(mes, feed_dict={x_data_train: x_vals_train, x_data_test: x_batch, y_data_train: y_vals_trian,y_data_test:y_batch}) print('Batch #'+str(i+1)+' MSE: '+str(np.round(batch_mse,3))) #频率直方图展示 bins=np.linspace(5,50,101) plt.hist(predictions, bins, alpha=0.5, label='Prediciton ') plt.hist(y_batch, bins, alpha=0.5, label='Actual') plt.title('Histogram of Predicted and Actual Value') plt.xlabel('home value') plt.ylabel('Frequency') plt.legend(loc='lower right') plt.show()