tensorflow训练汽车状态分类

(1).下载和处理数据

import pandas as pd
from urllib.request import urlretrieve

def load_data(download=True):
    if download:
        data_path, _ = urlretrieve("http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data", "car.csv")
        print('Download to car.csv')

    # use pandas to view the data struture
    col_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
    data = pd.read_csv('car.csv', names=col_names)
    return data


def convert2onehot(data):
    # onvert data to onehot representation
    return pd.get_dummies(data, prefix=data.columns)

if __name__ == '__main__':
    data = load_data(download=True)
    new_data = convert2onehot(data)

    print(data.head())
    print('\nNum of data: ', len(data), '\n')
    # view data values
    for name in data.keys():
        print(name, pd.unique(data[name]))
    print('\n', new_data.head(2))
    new_data.to_csv('car_onehot.csv', index=False)

得到数据:

Download to car.csv
  buying  maint doors persons lug_boot safety  class
0  vhigh  vhigh     2       2    small    low  unacc
1  vhigh  vhigh     2       2    small    med  unacc
2  vhigh  vhigh     2       2    small   high  unacc
3  vhigh  vhigh     2       2      med    low  unacc
4  vhigh  vhigh     2       2      med    med  unacc

Num of data:  1728 

buying ['vhigh' 'high' 'med' 'low']
maint ['vhigh' 'high' 'med' 'low']
doors ['2' '3' '4' '5more']
persons ['2' '4' 'more']
lug_boot ['small' 'med' 'big']
safety ['low' 'med' 'high']
class ['unacc' 'acc' 'vgood' 'good']

    buying_high  buying_low     ...       class_unacc  class_vgood
0            0           0     ...                 1            0
1            0           0     ...                 1            0

[2 rows x 25 columns]

(2)模型训练

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import data_processing

data = data_processing.load_data(download=False)
new_data = data_processing.convert2onehot(data)

# 准备训练数据
new_data = new_data.values.astype(np.float32)
np.random.shuffle(new_data)
sep = int(0.7 * len(new_data))
train_data = new_data[:sep]
test_data = new_data[sep:]

# 建立网络
tf_input = tf.placeholder(tf.float32, [None, 25], 'input')
tfx = tf_input[:, :21]
tfy = tf_input[:, 21:]

# 定义tensorflow网络形式
# 激活函数使用relu
l1 = tf.layers.dense(tfx, 128, tf.nn.relu, name='l1')
l2 = tf.layers.dense(l1, 128, tf.nn.relu, name='l2')
out = tf.layers.dense(l2, 4, name='l3')
prediction = tf.nn.softmax(out, name='pred')

loss = tf.losses.softmax_cross_entropy(onehot_labels=tfy, logits=out)
accuracy = tf.metrics.accuracy(
    labels=tf.argmax(tfy, axis=1),
    predictions=tf.argmax(out, axis=1)
)[1]
opt = tf.train.GradientDescentOptimizer(0.1)
train_op = opt.minimize(loss)

sess = tf.Session()
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))

for t in range(4000):
    batch_index = np.random.randint(len(train_data), size=32)
    sess.run(train_op, {tf_input: train_data[batch_index]})

    # 开始训练
    if t % 50 == 0:
        acc_, pred_, loss_ = sess.run(
            [accuracy, prediction, loss],
            {tf_input: test_data}
        )
        print("Step: %i" % t, "| Accurate: %.2f" % acc_, "| Loss: %.2f" % loss_,)

最后结果

Step: 2500 | Accurate: 0.95 | Loss: 0.05
Step: 2550 | Accurate: 0.95 | Loss: 0.06
Step: 2600 | Accurate: 0.95 | Loss: 0.05
Step: 2650 | Accurate: 0.96 | Loss: 0.05
Step: 2700 | Accurate: 0.96 | Loss: 0.05
Step: 2750 | Accurate: 0.96 | Loss: 0.04
Step: 2800 | Accurate: 0.96 | Loss: 0.04
Step: 2850 | Accurate: 0.96 | Loss: 0.04
Step: 2900 | Accurate: 0.96 | Loss: 0.04
Step: 2950 | Accurate: 0.96 | Loss: 0.05
Step: 3000 | Accurate: 0.96 | Loss: 0.05
Step: 3050 | Accurate: 0.96 | Loss: 0.05
Step: 3100 | Accurate: 0.96 | Loss: 0.04
Step: 3150 | Accurate: 0.96 | Loss: 0.04
Step: 3200 | Accurate: 0.96 | Loss: 0.04
Step: 3250 | Accurate: 0.96 | Loss: 0.04
Step: 3300 | Accurate: 0.96 | Loss: 0.04
Step: 3350 | Accurate: 0.96 | Loss: 0.04
Step: 3400 | Accurate: 0.96 | Loss: 0.04
Step: 3450 | Accurate: 0.96 | Loss: 0.04
Step: 3500 | Accurate: 0.96 | Loss: 0.04
Step: 3550 | Accurate: 0.96 | Loss: 0.04
Step: 3600 | Accurate: 0.96 | Loss: 0.04
Step: 3650 | Accurate: 0.96 | Loss: 0.04
Step: 3700 | Accurate: 0.96 | Loss: 0.04
Step: 3750 | Accurate: 0.96 | Loss: 0.04
Step: 3800 | Accurate: 0.96 | Loss: 0.04
Step: 3850 | Accurate: 0.96 | Loss: 0.04
Step: 3900 | Accurate: 0.96 | Loss: 0.04
Step: 3950 | Accurate: 0.97 | Loss: 0.04

添加可视化部分

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import data_processing

data = data_processing.load_data(download=False)
new_data = data_processing.convert2onehot(data)

# 准备训练数据
new_data = new_data.values.astype(np.float32)
np.random.shuffle(new_data)
sep = int(0.7 * len(new_data))
train_data = new_data[:sep]
test_data = new_data[sep:]

# 建立网络
tf_input = tf.placeholder(tf.float32, [None, 25], 'input')
tfx = tf_input[:, :21]
tfy = tf_input[:, 21:]

# 定义tensorflow网络形式
# 激活函数使用relu
l1 = tf.layers.dense(tfx, 128, tf.nn.relu, name='l1')
l2 = tf.layers.dense(l1, 128, tf.nn.relu, name='l2')
out = tf.layers.dense(l2, 4, name='l3')
prediction = tf.nn.softmax(out, name='pred')

loss = tf.losses.softmax_cross_entropy(onehot_labels=tfy, logits=out)
accuracy = tf.metrics.accuracy(
    labels=tf.argmax(tfy, axis=1),
    predictions=tf.argmax(out, axis=1)
)[1]
opt = tf.train.GradientDescentOptimizer(0.1)
train_op = opt.minimize(loss)

sess = tf.Session()
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))

# 添加可视化
plt.ion()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
accuracies, steps = [], []

for t in range(4000):
    batch_index = np.random.randint(len(train_data), size=32)
    sess.run(train_op, {tf_input: train_data[batch_index]})

    # 开始训练
    if t % 50 == 0:
        acc_, pred_, loss_ = sess.run(
            [accuracy, prediction, loss],
            {tf_input: test_data}
        )
        accuracies.append(acc_)
        steps.append(t)
        print("Step: %i" % t, "| Accurate: %.2f" % acc_, "| Loss: %.2f" % loss_,)

        # 可视化
        ax1.cla()
        for c in range(4):
            bp = ax1.bar(c+0.1, height=sum((np.argmax(pred_, axis=1) == c)), width=0.2, color='red')
            bt = ax1.bar(c-0.1, height=sum((np.argmax(test_data[:, 21:], axis=1) == c)), width=0.2, color='blue')
        ax1.set_xticks(range(4), ['accepted', 'good', 'unaccepted', 'very good'])
        ax1.legend(handles=[bp, bt], labels=['prediction', 'target'])
        ax1.set_ylim((0, 400))
        ax2.cla()
        ax2.plot(steps, accuracies, label='accuracy')
        ax2.set_ylim(ymax=1)
        ax2.set_ylabel('accuracy')
        plt.pause(0.01)

plt.ioff()
plt.show()

image-20180711174142672

  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值