02 python 学习


这里主要讲解 argsort 排序这个函数,
来源于:numpy.argsort
pandas继承于numpy:pandas.Series.argsort


from sklearn.model_selection import train_test_split
#这里train_test_split返回的都是<class 'pandas.core.series.Series'>
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, train_size=0.8)
order = y_test.argsort(axis=0)#获取顺序
print('type of order:', type(order))
print('content of order:', order)
print('type of y_test: ',type(y_test))
print('y test before sort:',y_test)
y_test = y_test.values[order]#对y_test进行排序
print('y test after sort:', y_test)
#对x_test进行排序,由于x_test为矩阵,所以使用[order,:]
x_test = x_test.values[order, :]


import matplotlib.pyplot as plt from math import sqrt from matplotlib import pyplot import pandas as pd from numpy import concatenate from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.optimizers import Adam import tensorflow ''' keras实现神经网络回归模型 ''' # 读取数据 path = 'data001.csv' # 删掉不用字符串字段 train = pd.read_csv(path) dataset = train.iloc[1:,:] # df转array values = dataset.values # 原始数据标准化,为了加速收敛 scaler = MinMaxScaler(feature_range=(0, 1)) scaled = scaler.fit_transform(values) y = scaled[:, -1] X = scaled[:, 0:-1] # 随机拆分训练集与测试集 from sklearn.model_selection import train_test_split train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3) # 全连接神经网络 model = Sequential() input = X.shape[1] # 隐藏层128 model.add(Dense(128, input_shape=(input,))) model.add(Activation('relu')) # Dropout层用于防止过拟合 # model.add(Dropout(0.2)) # 隐藏层128 model.add(Dense(128)) model.add(Activation('relu')) # model.add(Dropout(0.2)) # 没有激活函数用于输出层,因为这是一个回归问题,我们希望直接预测数值,而不需要采用激活函数进行变换。 model.add(Dense(1)) # 使用高效的 ADAM 优化算法以及优化的最小均方误差损失函数 model.compile(loss='mean_squared_error', optimizer=Adam()) # early stoppping from keras.callbacks import EarlyStopping early_stopping = EarlyStopping(monitor='val_loss', patience=50, verbose=2) # 训练 history = model.fit(train_X, train_y, epochs=300, batch_size=20, validation_data=(test_X, test_y), verbose=2, shuffle=False, callbacks=[early_stopping]) # loss曲线 pyplot.plot(history.history['loss'], label='train') pyplot.plot(history.history['val_loss'], label='test') pyplot.legend() pyplot.show() # 预测 yhat = model.predict(test_X) # 预测y逆标准化 inv_yhat0 = concatenate((test_X, yhat), axis=1) inv_yhat1 = scaler.inverse_transform(inv_yhat0) inv_yhat = inv_yhat1[:, -1] # 原始y逆标准化 test_y = test_y.reshape((len(test_y), 1)) inv_y0 = concatenate((test_X, test_y), axis=1) inv_y1 = scaler.inverse_transform(inv_y0) inv_y = inv_y1[:, -1] # 计算 RMSE rmse = sqrt(mean_squared_error(inv_y, inv_yhat)) print('Test RMSE: %.3f' % rmse) plt.plot(inv_y) plt.plot(inv_yhat) plt.show() ``` ``` 报错是:Traceback (most recent call last): File "F:/SSD/CNN.py", line 24, in <module> scaled = scaler.fit_transform(values) File "D:\anaconda\lib\site-packages\sklearn\base.py", line 464, in fit_transform return self.fit(X, **fit_params).transform(X) File "D:\anaconda\lib\site-packages\sklearn\preprocessing\data.py", line 334, in fit return self.partial_fit(X, y) File "D:\anaconda\lib\site-packages\sklearn\preprocessing\data.py", line 362, in partial_fit force_all_finite="allow-nan") File "D:\anaconda\lib\site-packages\sklearn\utils\validation.py", line 527, in check_array array = np.asarray(array, dtype=dtype, order=order) File "D:\anaconda\lib\site-packages\numpy\core\numeric.py", line 538, in asarray return array(a, dtype, copy=False, order=order) ValueError: could not convert string to float: 'label' label是csv文件里的列名,但是就算去掉,还是会报这个错误
©️2020 CSDN 皮肤主题: 编程工作室 设计师:CSDN官方博客 返回首页