python深度学习笔记第三章

Table of Contents

3.1神经网络

3.4电影评论分类,二分类问题

3.5新闻分类:多分类问题

3.6预测房价,回归问题

总结:


3.1神经网络

3.4电影评论分类,二分类问题

# coding=utf-8
"""
__project_ = 'Python深度学习'
__file_name__ = '3.4电影评论分类'
__author__ = 'WIN10'
__time__ = '2020/4/11 12:02'
__product_name = PyCharm

"""

from keras.datasets import imdb
import numpy as np
from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics
import matplotlib.pyplot as plt
# 读取数据
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

print(train_data[0])
print(train_labels[0])

word_index = imdb.get_word_index()
reverse_world_index = dict([(value, key) for (key, value) in word_index.items()])
decoded_review = ' '.join([reverse_world_index.get(i - 3, '?') for i in train_data[0]])
print(decoded_review)

#准备数据
def vectorize_sequences(sequences,dimension=10000):
    results=np.zeros((len(sequences),dimension))
    for i ,sequences in enumerate(sequences):
        results[i,sequences]=1
    return results
x_train=vectorize_sequences(train_data)
x_test=vectorize_sequences(test_data)
print(x_train.shape)

y_train=np.asarray(train_labels).astype('float32')
y_test=np.asarray(test_labels).astype('float32')
#留出验证集
x_val=x_train[:10000]
partial_x_train=x_train[10000:]

y_val=y_train[:10000]
partial_y_train=y_train[10000:]
#构建网络

models=models.Sequential()
models.add(layers.Dense(16,activation='relu',input_shape=(10000,)))
models.add(layers.Dense(16,activation='relu'))
models.add(layers.Dense(1,activation='sigmoid'))

# #编译 需要3个参数 ,损失函数、优化器、训练和测试过程中的键控指标
# models.compile(optimizer='rmsprop',
#                loss='binary_crossentropy',
#                metrics=['accuracy'])
#
# #配置优化器
# models.compile(optimizer=optimizers.RMSprop(lr=0.001),
#                loss='binary_crossentropy',
#                metrics=['accuracy'])
#配置损失函数和指标
models.compile(optimizer=optimizers.RMSprop(lr=0.001),
               loss=losses.binary_crossentropy,
               metrics=[metrics.binary_accuracy])

#训练模型
history=models.fit(partial_x_train,
                   partial_y_train,
                   epochs=20,
                   batch_size=512,
                   validation_data=(x_val,y_val))

#验证
results=models.evaluate(x_test,y_test)
print(results)
#绘制训练损失和验证损失
history_dict=history.history
loss_values=history_dict['loss']
val_loss_values=history_dict['val_loss']

epochs=range(1,len(loss_values)+1)
plt.plot(epochs,loss_values,'bo',label='Training loss')
plt.plot(epochs,val_loss_values,'b',label='Validation loss')
plt.title('loss')
plt.xlabel('Epochs')
plt.ylabel('loss')
plt.legend()

plt.show()

3.5新闻分类:多分类问题

# coding=utf-8
"""
__project_ = 'Python深度学习'
__file_name__ = '3.5新闻分类'
__author__ = 'WIN10'
__time__ = '2020/4/11 13:16'
__product_name = PyCharm

"""

from keras.datasets import  reuters
import  numpy as np
from keras import models
from keras import layers

#读取数据
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)

#准备数据
def vectorize_sequences(sequences,dimension=10000):
    results=np.zeros((len(sequences),dimension))
    for i ,sequences in enumerate(sequences):
        results[i,sequences]=1
    return results
x_train=vectorize_sequences(train_data)
x_test=vectorize_sequences(test_data)
def to_one_hot(labels,dimension=46):
    results=np.zeros((len(labels),dimension))
    for i ,label in enumerate(labels):
        results[i,label]=1
    return results
# 1.标签转化为one-hot  损失函数对应为 categorical_crossentropy
# one_hot_train_labels=to_one_hot(train_labels)
# one_hot_test_labels=to_one_hot(test_labels)
# 2.标签转为整数张量  损失函数对应为 sparse_categorical_crossentropy
y_train=np.array(train_labels)
y_test=np.array(test_labels)
#留出验证集
x_val=x_train[:1000]
partial_x_train=x_train[1000:]

# y_val=one_hot_train_labels[:1000]
# partial_y_train=one_hot_train_labels[1000:]
y_val=y_train[:1000]
partial_y_train=y_train[1000:]
print(y_val)
#构建网络
models=models.Sequential()
models.add(layers.Dense(64,activation='relu',input_shape=(10000,)))
models.add(layers.Dense(64,activation='relu'))
models.add(layers.Dense(46,activation='softmax'))

#编译
# models.compile(optimizer='rmsprop',
#                loss='categorical_crossentropy',
#                metrics=['accuracy'])
models.compile(optimizer='rmsprop',
               loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

#训练
history=models.fit(partial_x_train,
                   partial_y_train,
                   epochs=20,
                   batch_size=512,
                   validation_data=(x_val,y_val))

results=models.evaluate(x_test,y_test)
print(results)

3.6预测房价,回归问题

# coding=utf-8
"""
__project_ = 'Python深度学习'
__file_name__ = '预测房价'
__author__ = 'WIN10'
__time__ = '2020/4/11 13:53'
__product_name = PyCharm

"""
from keras.datasets import boston_housing
from keras import models
from keras import layers
import numpy as np
#读取数据
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

#数据标准化
mean=train_data.mean(axis=0)
train_data-=mean
std=train_data.std(axis=0)
train_data/=std

test_data-=mean
test_data/=std

#构建网络   mse  均方误差   mae 平均绝对误差
def build_model():
    model=models.Sequential()
    model.add(layers.Dense(64,activation='relu',input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64,activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop',loss='mse',metrics=['mae'])
    return model

#K折验证
k=4
num_val_samples=len(train_data)//k
num_epochs=500
all_scores=[]
all_mea_histories=[]
for i in range(k):
    val_data=train_data[i*num_val_samples:(i+1)*num_val_samples]
    val_targets=train_targets[i*num_val_samples:(i+1)*num_val_samples]

    partial_train_data=np.concatenate([train_data[:i*num_val_samples],train_data[(i+1)*num_val_samples:]],axis=0)
    partial_train_targets=np.concatenate([train_targets[:i*num_val_samples],train_targets[(i+1)*num_val_samples:]],axis=0)

    model=build_model()
    history=model.fit(partial_train_data,partial_train_targets,epochs=num_epochs,batch_size=1,verbose=0,validation_data=(val_data,val_targets))
    #verbose=1 训练过程中展示进度条
    # val_mse,val_mae=model.evaluate(val_data,val_targets,verbose=0)
    # all_scores.append(val_mae)
    mae_history=history.history['val_mean_absolute_error']
    all_mea_histories.append(mae_history)

print(all_mea_histories)

总结:

回归问题使用损失函数是均方误差(MSE)

回归指标是平均绝对误差(MAE)

如果输入数据的特征具有不同的取值范围,应该预处理,缩放特征。

如果可用数据很少,可以采用K折验证

如果训练数据少,最好使用隐藏层少的小型网络,以避免过拟合。

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值