一.BP神经网络:曾经的最著名、最经典的非线性学习算法
(1)BP神经网络基本结构
(2)传递激活函数
(3)训练过程
a.正向传播过程
b.计算期望与实际分类的误差
c.计算反向传播过程
d.修正各层的权值
(4)代码实现
# -*- coding: UTF-8 -*-
from numpy import *
import operator
import Untils
import matplotlib.pyplot as plt
# 传递函数:
def logistic(inX):
return 1.0/(1.0+exp(-inX))
# 传递函数的导函数
def dlogit(inX1,inX2):
return multiply(inX2,(1.0-inX2))
# 矩阵各元素平方之和
def errorfunc(inX):
return sum(power(inX,2))/2.0
# 加载student.txt数据集
def loadDataSet(filename):
dataMat = []; labelMat = []
fr = open(filename) #testSet.txt
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([float(lineArr[0]), float(lineArr[1]), 1.0])
labelMat.append(int(lineArr[2]))
return dataMat,labelMat
# 数据标准化(归一化):student.txt数据集
def normalize(dataMat):
# 标准化
dataMat[:,0] = (dataMat[:,0]-mean(dataMat[:,0]))/std(dataMat[:,0])
dataMat[:,1] = (dataMat[:,1]-mean(dataMat[:,1]))/std(dataMat[:,1])
return dataMat
def bpNet(dataSet,classLabels):
# 数据集矩阵化
SampIn = mat(dataSet).T
expected = mat(classLabels)
m,n = shape(dataSet)
# 网络参数
eb = 0.01 # 误差容限
eta = 0.05 # 学习率
mc = 0.3 # 动量因子
maxiter = 2000 # 最大迭代次数
errlist = [] # 误差列表
# 构造网络
# 初始化网络
nSampNum = m; # 样本数量
nSampDim = n-1; # 样本维度
nHidden = 4; # 隐含层神经元
nOut = 1; # 输出层
# 隐含层参数
hi_w = 2.0*(random.rand(nHidden,nSampDim)-0.5)
hi_b = 2.0*(random.rand(nHidden,1)-0.5)
hi_wb = mat(Untils.mergMatrix(mat(hi_w),mat(hi_b)))
# 输出层参数
out_w = 2.0*(random.rand(nOut,nHidden)-0.5)
out_b = 2.0*(random.rand(nOut,1)-0.5)
out_wb = mat(Untils.mergMatrix(mat(out_w),mat(out_b)))
# 默认旧权值
dout_wbOld = 0.0 ; dhi_wbOld = 0.0
for i in xrange(maxiter):
#1. 工作信号正向传播
#1.1 输入层到隐含层
hi_input = hi_wb*SampIn
hi_output = logistic(hi_input)
hi2out = Untils.mergMatrix(hi_output.T, ones((nSampNum,1))).T
#1.2 隐含层到输出层
out_input = out_wb*hi2out
out_output = logistic(out_input)
#2. 误差计算
err = expected - out_output
sse = errorfunc(err)
errlist.append(sse);
#2.1 判断是否收敛
if sse <= eb:
print "iteration:",i+ 1
break;
#3.误差信号反向传播
#3.1 DELTA为输出层到隐含层梯度
DELTA = multiply(err,dlogit(out_input,out_output))
wDelta = out_wb[:,:-1].T*DELTA
#3.2 delta为隐含层到输入层梯度
delta = multiply(wDelta,dlogit(hi_input,hi_output))
dout_wb = DELTA*hi2out.T
#3.3 输入层的权值更新
dhi_wb = delta*SampIn.T
#3.4 更新输出层和隐含层权值
if i == 0:
out_wb = out_wb + eta * dout_wb
hi_wb = hi_wb + eta * dhi_wb
else :
out_wb = out_wb + (1.0 - mc)*eta*dout_wb + mc * dout_wbOld
hi_wb = hi_wb + (1.0 - mc)*eta*dhi_wb + mc * dhi_wbOld
dout_wbOld = dout_wb
dhi_wbOld = dhi_wb
return errlist,out_wb,hi_wb
def BPClassfier(start,end,WEX,wex):
x = linspace(start,end,30)
xx = mat(ones((30,30)))
xx[:,0:30] = x
yy = xx.T
z = ones((len(xx),len(yy))) ;
for i in range(len(xx)):
for j in range(len(yy)):
xi = []; tauex=[] ; tautemp=[]
mat(xi.append([xx[i,j],yy[i,j],1]))
hi_input = wex*(mat(xi).T)
hi_out = logistic(hi_input)
taumrow,taucol= shape(hi_out)
tauex = mat(ones((1,taumrow+1)))
tauex[:,0:taumrow] = (hi_out.T)[:,0:taumrow]
HM = WEX*(mat(tauex).T)
out = logistic(HM)
z[i,j] = out
return x,z
from numpy import *
import operator
from bpNet import *
import matplotlib.pyplot as plt
# 数据集
bpnet = BPNet()
bpnet.loadDataSet("testSet2.txt")
bpnet.dataMat = bpnet.normalize(bpnet.dataMat)
# 绘制数据集散点图
bpnet.drawClassScatter(plt)
# BP神经网络进行数据分类
bpnet.bpTrain()
print bpnet.out_wb
print bpnet.hi_wb
# 计算和绘制分类线
x,z = bpnet.BPClassfier(-3.0,3.0)
bpnet.classfyLine(plt,x,z)
plt.show()
# 绘制误差曲线
bpnet.TrendLine(plt)
plt.show()
测试输出结果如下:
二.自组织映射神经网络:(SOM)一种无监督的聚类算法,用于比较强的簇结构,只有两层即输入和输出层。数据松散运用KMeans.
(1)结构图
(2)代码实现:
import sys
import random
import math
class SOM:
def __init__(self, inputDim, mapWidth, mapHeight, inputPatternSet):
self.__eta0 = 0.1
self.__sigma0 = 2.0
self.__tau1 = 250
self.__tau2 = 500
self.__patternLength = inputDim
self.__mapWidth = mapWidth
self.__mapHeight = mapHeight
self.__w = [[[random.random() * 0.1 for i in range(self.__patternLength)]
for i in range(self.__mapWidth)]
for i in range(self.__mapHeight)]
def coordinates(self, pattern):
winI = 0
winJ = 0
min = sys.float_info.max
for i in range(self.__mapHeight):
for j in range(self.__mapWidth):
s = 0.0
for k in range(3):
s = s + abs(pattern.patternItem[k] - self.__w[i][j][k])
if min > s:
min = s
winI = i
winJ = j
return (winI, winJ)
def clustering(self, patterns):
# for pattern in patterns:
# if not isinstance(pattern, IPattern):
# raise TypeError("Pattern must conform to IPattern")
#debug
self.__clu = [[] for i in range(len(patterns))]
self.__patterns = patterns
self.__clusteringProcess()
#self.__writeClu()
#debug
def __writeClu(self):
import csv
w = csv.writer(file(r'/Users/alexander/temp/data1.csv','wb'))
w.writerows(self.__clu)
# for i in range(len(self.__clu)):
# for j in range(len(self.__clu[i])):
# print i, j, self.__clu[i][j]
def __clusteringProcess(self):
error = 0.0
e = sys.float_info.max
de = e - error
end = False
i = 0
while not end:
error = self.__clusteringEpoch(self.__patterns, i)
de = abs(e - error)
if de < 0.0001 or i > 10000:
end = True
print "Iteration: ", i
print "Error: ", e
print "Error delta: ", de
e = error
i = i + 1
def __clusteringEpoch(self, patterns, n):
error = 0.0
randOrder = range(len(patterns))
for i in range(len(patterns)):
placeToSwap = random.randint(1, len(patterns) - 1)
temp = randOrder[i]
randOrder[i] = randOrder[placeToSwap]
randOrder[placeToSwap] = temp
for patIndex in range(len(patterns)):
currentPatternIndex = randOrder[patIndex]
pattern = self.__patterns[currentPatternIndex]
winI = 0
winJ = 0
min = sys.float_info.max
for i in range(self.__mapHeight):
for j in range(self.__mapWidth):
s = 0.0
for k in range(3):
s = s + pow(pattern.patternItem[k] - self.__w[i][j][k], 2)
s = math.sqrt(s)
if min > s:
min = s
winI = i
winJ = j
#self.__clu[currentPatternIndex].append((winI, winJ))
self.__clu[currentPatternIndex].append(winI * self.__mapWidth + winJ)
#print "(winI, winJ): ", (winI, winJ)
self.__printW()
e = 0.0
eta = self.__eta(n)
for i in range(self.__mapHeight):
for j in range(self.__mapWidth):
nbh = self.__neighbourhood(i, j, winI, winJ, n)
for k in range(3):
dif = self.__patterns[currentPatternIndex].patternItem[k] - self.__w[i][j][k]
e = nbh * dif
#print "w[", i, "][", j, "][", k, "]: ", self.__w[i][j][k]
self.__w[i][j][k] += eta * e;
#print "w[", i, "][", j, "][", k, "]: ", self.__w[i][j][k]
error += abs(e)
self.__printW()
return error
#for debug
def __printW(self):
return
for i in range(self.__mapHeight):
for j in range(self.__mapWidth):
print i * self.__mapWidth + j, " ", self.__w[i][j]
def __eta(self, n):
n = n * 1.0
eta = self.__eta0 * math.exp(-n / self.__tau2)
#print "Eta(", n, "): ", eta
return eta
def __neighbourhood(self, i, j, centerI, centerJ, n):
di = i - centerI
dj = j - centerJ
distance2 = di * di + dj * dj
nbh = math.exp(- distance2 / (2.0 * math.pow(self.__sigma0 * math.exp(-n/self.__tau1), 2)))
#print "Nbh(", "i: ", i, "j: ", j, "n: ", n, "): ", nbh
return nbh
import numpy as np
from Kohonen import *
from numpy import *
import matplotlib.pyplot as plt
# 矩阵各元素平方之和
def errorfunc(inX):
return sum(power(inX,2))*0.5
# 加载坐标数据文件
SOMNet = Kohonen()
SOMNet.loadDataSet("dataset2.txt");
SOMNet.train()
print SOMNet.w
SOMNet.showCluster(plt)
SOMNet.TrendLine(plt,SOMNet.lratelist)
SOMNet.TrendLine(plt,SOMNet.rlist)
运行结果:
三.模拟退火算法(Boltzmann机):无监督网络学习.
(1)简单的代码实现:
import operator
import Untils
from numpy import *
import copy
import matplotlib.pyplot as plt
# 计算矩阵各向量之间的距离:返回一个对称的n*n矩阵
def distM(matA,matB):
ma,na = shape(matA);
mb,nb = shape(matB);
rtnmat= zeros((ma,nb))
for i in xrange(ma):
for j in xrange(nb):
rtnmat[i,j] = sqrt(sum(power(matA[i,:] - matB[:,j].T,2)))
return rtnmat
def pathLen(dist,path):
# dist:N*N邻接矩阵
# 长度为N的向量,包含从1-N的整数
N = len(path)
plen = 0;
for i in xrange(0,N-1):
plen += dist[path[i], path[i+1]]
plen += dist[path[0], path[N-1]]
return plen
def changePath(old_path):
# 在oldpath附近产生新的路径
if type(old_path) is not list :
old_path = old_path.tolist()
N = len(old_path)
if random.rand() < 0.25: # 产生两个位置,并交换
chpos = floor(random.rand(1,2)*N) # random.rand(1,2)
chpos = chpos.tolist()[0]
new_path = copy.deepcopy(old_path)
new_path[int(chpos[0])] = old_path[int(chpos[1])]
new_path[int(chpos[1])] = old_path[int(chpos[0])]
else: # 产生三个位置,交换a-b和b-c段
d = ceil(random.rand(1,3)*N);
d = d.tolist()[0]
d.sort()
a = int(d[0]); b = int(d[1]); c = int(d[2])
if a != b and b != c:
new_path = copy.deepcopy(old_path)
new_path[a:c-1] = old_path[b-1:c-1] + old_path[a:b-1]
else:
new_path = changePath(old_path)
return new_path
def boltzmann(cityPosition,MAX_ITER = 2000,T0 = 1000,Lambda = 0.97):
m,n = shape(cityPosition)
pn = m
# 将城市的坐标矩阵转换为邻接矩阵(城市间距离矩阵)
dist = distM(cityPosition,cityPosition.T)
# 初始化
MAX_M = m;
# 构造一个初始可行解
x0 = arange(m)
random.shuffle(x0)
#
T = T0;
iteration = 0;
x = x0; # 路径变量
xx = x0.tolist(); # 每个路径
di = []
di.append(pathLen(dist, x0)) # 每个路径对应的距离
k = 0; # 路径计数
# 外循环
while iteration <= MAX_ITER:
# 内循环迭代器
m = 0;
# 内循环
while m <= MAX_M:
# 产生新路径
newx = changePath(x)
# 计算距离
oldl = pathLen(dist,x)
newl = pathLen(dist,newx)
if ( oldl > newl): # 如果新路径优于原路径,选择新路径作为下一状态
x = newx
xx.append(x) # xx[n,:] = x
di.append(newl) # di[n] = newl
k += 1
else: # 如果新路径比原路径差,则执行概率操作
tmp = random.rand()
sigmod = exp(-(newl - oldl)/T)
if tmp < sigmod:
x = newx
xx.append(x) # xx[n,:] = x
di.append(newl) # di[n]= newl
k += 1
m += 1 # 内循环次数加1
# 内循环
iteration += 1 # 外循环次数加1
T = T*Lambda # 降温
# 计算最优值
bestd = min(di)
indx = argmin(di)
bestx = xx[indx]
print "循环迭代",k,"次"
print "最优解:",bestd
print "最佳路线:",bestx
return bestx,di
import operator
import copy
import Untils
import Boltzmann
from numpy import *
import matplotlib.pyplot as plt
dataSet = Untils.loadDataSet("dataSet25.txt")
cityPosition = mat(dataSet)
m,n = shape(cityPosition)
bestx,di = Boltzmann.boltzmann(cityPosition,MAX_ITER = 1000,T0 = 100)
# 优化前城市图,路径图
Untils.drawScatter(cityPosition,flag=False)
Untils.drawPath(range(m),cityPosition)
# 显示优化后城市图,路径图
Untils.drawScatter(cityPosition,flag=False)
Untils.drawPath(bestx,cityPosition,color='b')
# 绘制误差趋势线
x0 = range(len(di));
Untils.TrendLine(x0,di)
输出结果如下: