神经网络 Theano 学习笔记

基本用法

import numpy as np
import  theano.tensor as T
from theano import  function
x = T.dscalar('x')#建立存量 d表示float64
y = T.dscalar('y')
z = x + y
f = function([x,y],z)#前输入量后输出量
print(f(2,3))
#显示function
from theano import pp
print(pp(z))
#定义矩阵
x = T.dmatrix('x')
y = T.dmatrix('y')
z = x + y
z = T.dot(x,y)#矩阵乘法
f = function([x,y],z)
print(f(np.arange(12).reshape((3,4)),
          10*np.ones((3,4))))

function用法

import numpy as np
import theano.tensor as T
import theano

#激励函数例子
x = dmatrix('x')
s = 1/(1+T.exp(-x)) #运用theano运算方式
logistic = theano.function([x],s)
print(logistic([[0,1],[-2,-3]]))
#function返回多个值
a,b = T.dmatrices('a','b')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff**2
f = theano.function([a,b],[diff,abs_diff,diff_squared])
x1,x2,x3=f(
np.ones((2,2)),
np.arange(4).reshape((2,2))
)
```a
#运用function名字操作
x,y,w = T.dscalars('x','y','w')
z = (x+y)*w
f = theano.function([x,
theano.In(y,value=1),#theano定义默认值
thaeno.In(w,value=2,name='weights'],#theano定义名字
z)
print(f(23))
print(f(23,2))#改变默认值
print(f(23,2,weights=4))#运用名字改变默认值

shared变量

import numpy as np
import theano
import theano.tensor as T

state = theano.shared(np.array(0,dtype=np.float64),'state')#定义shared变量,定义dtype,前后必须保持一致
inc = T.scalar('inc',dtype=state.dtype)
accumulator = theano.function([inc],state,update[(state,state+inc)])
#得到变量值
print(state.get_value())#得到当前state值
accumulator(1)
print(state.get_value())
accumulator(10)
print(state.get_value())
#设置变量值
state.set_value(-1)
accumulator(3)
print(state.get_value())
#暂时使用某值代替变量值,不改变变量值
tmp_func = state*2 + inc
a = T.scalar(dtype = state.dtype)
skip_shared = theano.function([inc,a],tmp_func,givens=[(state,a)])#暂时将state值用a的值代替
print(ship_shared(2,3))
print(state.get_value())

定义Layer类

#定义layer
I1 = Layer(inputs,in_size=1,out_size=10,activation_function)
I2 = Layer(I1.outputs,10,1,None)
import theano
import theano.tensor as T
import numpy as np

class Layer(object):
	def __int__(self,inputs,in_size,out_size,activation_function):
	self.W = theano.shared(np.random.normal(0,1,(in_size,out_size)))
	self.b = theano.shared(np.zeros((out_size,))+0.1)
	self.Wx_plus_b = T.dot(inputs,self.W)+self.b
	self.activation_function = activation_function
	if activation_function is None:
		self.outputs = self.Wx_plus_b
	else:
		self.outputs = self.activation_function(self.Wx-plus_b)

regression 回归例子


from __future__ import print_function
import theano
import theano.tensor as T
import numpy as np
import matplotlib.pyplot as plt


class Layer(object):
    def __init__(self, inputs, in_size, out_size, activation_function=None):
        self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size)))
        self.b = theano.shared(np.zeros((out_size, )) + 0.1)
        self.Wx_plus_b = T.dot(inputs, self.W) + self.b
        self.activation_function = activation_function
        if activation_function is None:
            self.outputs = self.Wx_plus_b
        else:
            self.outputs = self.activation_function(self.Wx_plus_b)


# 制造一些数据
x_data = np.linspace(-1, 1, 300)[:, np.newaxis]
noise = np.random.normal(0, 0.05, x_data.shape)
y_data = np.square(x_data) - 0.5 + noise        # y = x^2 - 0.5

# 展示数据
plt.scatter(x_data, y_data)
plt.show()

# 定义x,t容器
x = T.dmatrix("x")
y = T.dmatrix("y")

# 添加 layers
l1 = Layer(x, 1, 10, T.nnet.relu)
l2 = Layer(l1.outputs, 10, 1, None)

# 计算平均损失值
cost = T.mean(T.square(l2.outputs - y))

# 计算梯度下降值
gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b])

# 应用梯度下降值
learning_rate = 0.05
train = theano.function(
    inputs=[x, y],
    outputs=cost,
    updates=[(l1.W, l1.W - learning_rate * gW1),
             (l1.b, l1.b - learning_rate * gb1),
             (l2.W, l2.W - learning_rate * gW2),
             (l2.b, l2.b - learning_rate * gb2)])

# 预测
predict = theano.function(inputs=[x], outputs=l2.outputs)

for i in range(1000):
    # training
    err = train(x_data, y_data)
    if i % 50 == 0:
        print(err)

regression 结果可视化

#承接上面预测步骤后
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(x_data, y_data)
plt.ion()#呈现实时更新
plt.show()

for i in range(1000):
    # training
    err = train(x_data, y_data)
    if i % 50 == 0:
        # 看到结果以及提高方式
        try:
            ax.lines.remove(lines[0])
        except Exception:
            pass
        prediction_value = predict(x_data)
        # 生成图
        lines = ax.plot(x_data, prediction_value, 'r-', lw=5)
        plt.pause(.5)

classification 分类学习

from __future__ import print_function
import numpy as np
import theano
import theano.tensor as T

def compute_accuracy(y_target, y_predict):
    correct_prediction = np.equal(y_predict, y_target)
    accuracy = np.sum(correct_prediction)/len(correct_prediction)
    return accuracy

rng = np.random

N = 400                                   # training 数据个数
feats = 784                               # input 的 feature数

# 生成随机数: D = (input_values, target_class)
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))

# 定义x y 容器
x = T.dmatrix("x")
y = T.dvector("y")

# 初始化 weights 和 bias
W = theano.shared(rng.randn(feats), name="w")
b = theano.shared(0., name="b")


# 定义激活函数,交叉熵
p_1 = T.nnet.sigmoid(T.dot(x, W) + b)   # 激励函数
prediction = p_1 > 0.5                    # p_1 > 0.5 时,预测值为 True,即为 1
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # 计算针对每个 sample 的交叉熵 xent
cost = xent.mean() + 0.01 * (W ** 2).sum()# 计算整批数据的 cost
gW, gb = T.grad(cost, [W, b])             # 计算 weights 和 bias 的梯度 gW, gb

# 激活网络
learning_rate = 0.1
train = theano.function(
          inputs=[x, y],
          outputs=[prediction, xent.mean()],
          updates=((W, W - learning_rate * gW), (b, b - learning_rate * gb)))
predict = theano.function(inputs=[x], outputs=prediction)

# 训练
for i in range(500):
    pred, err = train(D[0], D[1])
    if i % 50 == 0:
        print('cost:', err)
        print("accuracy:", compute_accuracy(D[1], predict(D[0])))

print("target values for D:")
print(D[1])
print("prediction on D:")
print(predict(D[0]))

过拟合

机器学习为追求更小的误差而导致自负

解决方法
一:增加数据量
二:L1,L2…regularization
Dropout regularization(神经网络)

regularization正规化

from __future__ import print_function
import theano
from sklearn.datasets import load_boston
import theano.tensor as T
import numpy as np
import matplotlib.pyplot as plt


class Layer(object):
    def __init__(self, inputs, in_size, out_size, activation_function=None):
        self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size)))
        self.b = theano.shared(np.zeros((out_size, )) + 0.1)
        self.Wx_plus_b = T.dot(inputs, self.W) + self.b
        self.activation_function = activation_function
        if activation_function is None:
            self.outputs = self.Wx_plus_b
        else:
            self.outputs = self.activation_function(self.Wx_plus_b)


def minmax_normalization(data):
    xs_max = np.max(data, axis=0)
    xs_min = np.min(data, axis=0)
    xs = (1 - 0) * (data - xs_min) / (xs_max - xs_min) + 0
    return xs

np.random.seed(100)
x_data = load_boston().data
# 将输入范围浓缩到某个小区间内
x_data = minmax_normalization(x_data)
y_data = load_boston().target[:, np.newaxis]

# 分离数据一部分用于训练,一部分用于测试
x_train, y_train = x_data[:400], y_data[:400]
x_test, y_test = x_data[400:], y_data[400:]

x = T.dmatrix("x")
y = T.dmatrix("y")

l1 = Layer(x, 13, 50, T.tanh)
l2 = Layer(l1.outputs, 50, 1, None)

# 不同方法计算损失值
cost = T.mean(T.square(l2.outputs - y))      # 没有正规化
# cost = T.mean(T.square(l2.outputs - y)) + 0.1 * ((l1.W ** 2).sum() + (l2.W ** 2).sum())  #  l2 正规化
# cost = T.mean(T.square(l2.outputs - y)) + 0.1 * (abs(l1.W).sum() + abs(l2.W).sum())  # l1 正规化
gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b])

learning_rate = 0.01
train = theano.function(
    inputs=[x, y],
    updates=[(l1.W, l1.W - learning_rate * gW1),
             (l1.b, l1.b - learning_rate * gb1),
             (l2.W, l2.W - learning_rate * gW2),
             (l2.b, l2.b - learning_rate * gb2)])

compute_cost = theano.function(inputs=[x, y], outputs=cost)

# 记录变化趋势
train_err_list = []
test_err_list = []
learning_time = []
for i in range(1000):
    train(x_train, y_train)
    if i % 10 == 0:
        # 记录误差值
        train_err_list.append(compute_cost(x_train, y_train))
        test_err_list.append(compute_cost(x_test, y_test))
        learning_time.append(i)

# 可视化
plt.plot(learning_time, train_err_list, 'r-')
plt.plot(learning_time, test_err_list, 'b--')
plt.show()

Save and reload 神经网络

#保存
with open('地址','wb') as file:
	model = [w.get_value(),b.get_value()]
	pickle.dump(model,file)
#提取
with open('..','rb') as file:
	model = pickle.load(file)
	w.set_value(model[0])
	b.set_value(model[1])

©️2020 CSDN 皮肤主题: 深蓝海洋 设计师:CSDN官方博客 返回首页