Tensorflow 深度学习,基础版图片分类

import tensorflow as tf
import pandas as pd
import numpy as np
import glob

# 1、读取图片数据
# key, value = read(file_queue)
# key: 文件名 - labels.csv - 目标值value:一个样本的内容
def read_pic():
    #1、构建文件名队列
    file_names=glob.glob('./data/GenPics/*.jpg')
    file_queue=tf.train.string_input_producer(file_names)
    #2、读取与解码
    reader=tf.WholeFileReader()
    filename,image=reader.read(file_queue)
    #解码阶段
    decoded_image=tf.image.decode_jpeg(image)
    #更新形状,将图片形状确定下来
    decoded_image.set_shape([20,80,3])
    #修改图片的类型
    cast_image=tf.cast(decoded_image,tf.float32)
    #3、批处理
    filename_batch,image_batch=tf.train.batch([filename,cast_image],batch_size = 100,num_threads = 1,capacity = 200)
    return filename_batch,image_batch

# 2、解析CSV文件,建立文件名和标签值对应表格
def parse_csv():
    # 读取文件
    csv_data=pd.read_csv('./data/GenPics/labels.csv',names = ['file_num','chars'],index_col = 'file_num')
    #根据字母生成对应数字
    # NZPP  [13,25,15,15]
    labels=[]
    for label in csv_data['chars']:
        letter=[]
        for word in label:
            # A在26字母中的序号为0 B为1
            letter.append(ord(word)-ord('A'))  # ord() 返回对应的ASCII数值
        labels.append(letter)

    csv_data['labels']=labels  # 更新labels列的内容

    return csv_data

# 3、将一个样本的特征值和目标值一一对应
# 通过文件名查表(csv_data)
def filename2label(filenames,csv_data):
    labels=[]

    for file_name in filenames:
        file_num="".join((list(filter(str.isdigit,str(file_name))))) # 获取csv的序号,从而找到目标值
        target_labels=csv_data.loc[int(file_num),'labels']  # 目标值
        labels.append(target_labels)
    return np.array(labels)

# 4、建立卷积神经网络模型 == 》得出y_predict
def create_weights(shape):
    # 定义权重和偏置  stddev 标准差
    return tf.Variable(initial_value = tf.random_normal(shape = shape,stddev = 0.01))

def create_model(x):
    # 构建卷积神经网络模型
    # x: [None,20,80,3]
    # 1、第一卷积大层
    with tf.variable_scope('conv1'):
        # 卷积层
        # 定义32个filter和偏置
        conv1_weights=create_weights(shape = [5,5,3,32])
        conv1_bias=create_weights(shape = [32])
        conv1_x=tf.nn.conv2d(input = x,filter = conv1_weights,strides = [1,1,1,1],padding="SAME")+conv1_bias
        # 激活层
        relu1_x=tf.nn.relu(conv1_x)
        # 池化层
        pool1_x=tf.nn.max_pool(value =relu1_x,ksize = [1,2,2,1],strides = [1,2,2,1],padding = "SAME")

    # 2、第一卷积大层
    with tf.variable_scope('conv2'):
        # 卷积层
        # 定义64个filter和偏置
        conv2_weights = create_weights(shape = [5, 5, 32, 64])
        conv2_bias = create_weights(shape = [64])
        conv2_x = tf.nn.conv2d(input = pool1_x, filter = conv2_weights, strides = [1, 1, 1, 1],padding = "SAME") + conv2_bias
        # 激活层
        relu2_x = tf.nn.relu(conv2_x)
        # 池化层
        pool2_x = tf.nn.max_pool(value = relu2_x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")

    # 全连接层
    with tf.variable_scope('full_connection'):
        # 改变形状,修改为二维数组类型
        # # 输入[None,10,40,32]-->[None,5,20,64]
        #[None, 5, 20, 64]-->[None, 5 * 20 * 64]
        #[None,5 * 20 * 64]*[5 * 20 * 64,4 * 26] = [None,4 * 26]
        x_fc=tf.reshape(pool2_x,shape = [-1,5*20*64])
        weights_fc=create_weights(shape = [5*20*64,4*26])
        bias_fc=create_weights(shape = [4*26])
        y_predict=tf.matmul(x_fc,weights_fc)+bias_fc
    return y_predict

if __name__ == '__main__':
    filename,image=read_pic()
    csv_data=parse_csv()

    # 1、准备数据
    x=tf.placeholder(tf.float32,shape = [None,20,80,3])
    y_true=tf.placeholder(tf.float32,shape = [None,4*26])
    # 2、构建模型
    y_predict=create_model(x)
    # 3、构建损失函数
    loss_list=tf.nn.sigmoid_cross_entropy_with_logits(labels = y_true,logits = y_predict)
    loss=tf.reduce_mean(loss_list)
    # 4、优化损失
    # optimizer=tf.train.GradientDescentOptimizer(learning_rate = 0.01).minimize(loss) # 梯度下降,最小化
    optimizer=tf.train.AdamOptimizer(learning_rate = 0.1).minimize(loss) # Adam优化器,最小化

    # 5、计算准确率
    equal_list=tf.reduce_all(tf.equal(tf.argmax(tf.reshape(y_predict,shape = [-1,4,26]),axis = 2),
                                      tf.argmax(tf.reshape(y_true, shape = [-1, 4,26]), axis = 2)),axis=1)
    accuracy=tf.reduce_mean(tf.cast(equal_list,tf.float32))
    # 初始化变量
    init=tf.global_variables_initializer()

    # 开启会话
    with tf.Session() as sess:
        sess.run(init)
        # 开启线程
        coor=tf.train.Coordinator()
        threads=tf.train.start_queue_runners(sess = sess,coord = coor)
        for i in range(150):
            filename_value,image_value=sess.run([filename,image])
            labels=filename2label(filename_value,csv_data) # 将一个样本的特征值和目标值一一对应
            # 将目标值转换为one-hot编码
            labels_value=tf.reshape(tf.one_hot(labels,depth = 26),[-1,4*26]).eval()
            _,error,accuracy_value=sess.run([optimizer,loss,accuracy],feed_dict = {x:image_value,y_true:labels_value})
            print('第%d次训练损失为%f,准确率为%f'%(i+1,error,accuracy_value))

        # 回收线程
        coor.request_stop()
        coor.join(threads)

#LSTM时间序列预测
from pandas import read_csv
from datetime import datetime
# load data
def parse(x):
    return datetime.strptime(x, '%Y %m %d %H')
dataset = read_csv("D:/Download/PRSA_data_2010.1.1-2014.12.31.csv",  parse_dates = [['year', 'month', 'day', 'hour']], index_col=0, date_parser=parse)
dataset
dataset.drop('No', axis=1, inplace=True)
# # manually specify column names
dataset.columns = ['pollution', 'dew', 'temp', 'press', 'wnd_dir', 'wnd_spd', 'snow', 'rain']
dataset.index.name = 'date'
# # mark all NA values with 0
dataset['pollution'].fillna(0, inplace=True)
dataset
# # drop the first 24 hours
dataset = dataset[24:]
# # summarize first 5 rows
print(dataset.head(5))
# # save to file
# dataset.to_csv('pollution.csv')
# load dataset
# dataset = read_csv('pollution.csv', header=0, index_col=0)
values = dataset.values
# specify columns to plot
groups = [0, 1, 2, 3, 5, 6, 7]
i = 1
# plot each column
plt.figure()
for group in groups:
    plt.subplot(len(groups), 1, i)
    plt.plot(values[:, group])
    plt.title(dataset.columns[group], y=0.5, loc='right')
    i += 1
pyplot.show()
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg
 
# # load dataset
# dataset = read_csv('pollution.csv', header=0, index_col=0)
values = dataset.values
# integer encode direction
encoder = LabelEncoder()
values[:,4] = encoder.fit_transform(values[:,4])
# # ensure all data is float
values = values.astype('float32')
# # normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
# # frame as supervised learning
reframed = series_to_supervised(scaled, 1, 1)
reframed
# # drop columns we don't want to predict
reframed.drop(reframed.columns[[9,10,11,12,13,14,15]], axis=1, inplace=True)
# print(reframed.head())
# split into train and test sets
values = reframed.values
n_train_hours = 365 * 24
train = values[:n_train_hours, :]
test = values[n_train_hours:, :]
# # split into input and outputs
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
# # reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
# design network
# model = Sequential()
# model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
# model.add(Dense(1))
# model.compile(loss='mae', optimizer='adam')
# # fit network
# history = model.fit(train_X, train_y, epochs=50, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
# # plot history
# pyplot.plot(history.history['loss'], label='train')
# pyplot.plot(history.history['val_loss'], label='test')
# pyplot.legend()
# pyplot.show()
 
# design network
model = Sequential()
model.add(layers.LSTM(32, return_sequences=True,input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(layers.LSTM(32, return_sequences=True))
model.add(layers.LSTM(32, return_sequences=True))
model.add(layers.LSTM(32)) 
model.add(layers.Dense(1))
model.summary() 
# model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
# model.add(Dense(1))
# model.summary()
model.compile(loss='mae', optimizer='adam')
# fit network
history = model.fit(train_X, train_y, epochs=50, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()
# make a prediction
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
# # invert scaling for forecast
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)


 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值