# -*- coding: utf-8 -*-
"""
Created on Thu Dec 22 19:57:29 2016
周志华机器学习作业
@author: ZQ
"""
import numpy as np
# 数据读取
def loadData():
data = []
with open('watermelon3.0.txt') as f:
for line in f.readlines():
data.append(line.strip().split('\t')[1:])
return np.array(data[1:])
# 将属性转为数字,返回转为的数字和属性对应的数字
def initData(data):
n,m = np.shape(data)
data_num = np.zeros((n,m))
label_list = []
for i in range(6):
label_dir = {}
set_i = set(data[:,i])
count = 1
for d in set_i:
label_dir.setdefault(d,count)
count += 1
label_list.append(label_dir)
for j in range(n):
data_num[j,i] = int(label_dir[data[j,i]])
for i in range(2):
for j in range(n):
data_num[j,6+i] = float(data[j,6+i])
Tdata = data_num[:,:-1]
Ldata = np.zeros(n)
for i in range(n):
if data[:,-1][i] == '是':
Ldata[i] = 1
else:
Ldata[i] = 0
return label_list,Tdata,Ldata
def BP(Tdata,Ldata):
#获取样本数与参数数
smple_n,parameter_n = np.shape(Tdata)
#输出层神经元
t = 1
#输入层与隐藏层的权值(隐藏层的神经元为parameter_n+1个)
v = np.random.random((parameter_n,parameter_n+1))
#隐藏层与输出层的权值
w = np.random.random((parameter_n+1,1))
#隐藏层与输出层阈值
th_y = np.random.random(parameter_n + 1)
th_s = np.random.random(t)
#累计误差分别对w,ths和v,thy求导的参数
G_j = np.zeros(t)
E_h = np.zeros(parameter_n + 1)
#学习率,迭代次数,同样的累计误差累计次数,前一次迭代的累计误差
xk = 1
kn = 0
sn = 0
old_ey = 0
while True:
kn += 1
#当前迭代误差
ey = 0
#计算隐藏层输出
Alp = Tdata.dot(v)
#sum_b = Alp.sum(axis = 0)
bj = 1/(1 + np.exp(-(Alp[:] - th_y)))
#计算输出层输出
for i in range(np.shape(bj)[0]):
Bet = bj[i].dot(w)
yl = 1/(1 + np.exp(-(Bet - th_s)))
ey += (Ldata[i] - yl)**2/2
# 计算求导参数
G_j = yl * (1-yl) * (Ldata[i] - yl)
E_h += w.dot(G_j)
E_h = E_h.dot(bj[i])*(1-bj[i])
# 更新w,ths和v,thy
w += (xk*G_j*bj[i]).reshape(9,1)
th_s += -xk * G_j
v += xk*np.mat(Tdata[i]).T*np.mat(E_h)
th_y += -xk*E_h
#迭代终止
if abs(old_ey - ey) < 0.0001:
sn += 1
if sn == 100:
break
else:
old_ey = ey
sn = 0
print('迭代次数:',kn)
return v,w,th_y,th_s
def sigmoid(x):
return 1/(1+np.exp(-x))
def ABP(Tdata,Ldata):
#获取样本数与参数数
smple_n,parameter_n = np.shape(Tdata)
#输出层神经元
t = 1
#输入层与隐藏层的权值(隐藏层的神经元为parameter_n+1个)
v = np.random.random((parameter_n,parameter_n+1))
#隐藏层与输出层的权值
w = np.random.random((parameter_n+1,1))
#隐藏层与输出层阈值
th_y = np.random.random(parameter_n + 1)
th_s = np.random.random(t)
#累计误差分别对w,ths和v,thy求导的参数
G_j = np.zeros(t)
E_h = np.zeros(parameter_n + 1)
#学习率,迭代次数,同样的累计误差累计次数,前一次迭代的累计误差
tk = 1
kn = 0
sn = 0
old_ey = 0
while True:
kn += 1
#当前迭代误差
ey = 0
#计算隐藏层输出
Alp = Tdata.dot(v)
#sum_b = Alp.sum(axis = 0)
bj = 1/(1 + np.exp(-(Alp[:] - th_y)))
#计算输出层输出
Bet = bj.dot(w)
yl = sigmoid(Bet-th_s)
#用来存累计误差对四个变量的下降方向
tv=np.zeros((parameter_n,parameter_n+1))
tw=np.zeros((parameter_n+1,t))
tthy=np.zeros(parameter_n+1)
tthj=np.zeros(t)
for i in range(np.shape(bj)[0]):
ey += (Ldata[i]-yl[i])**2/2
G_j = yl[i] * (1-yl[i]) * (Ldata[i] - yl[i])
E_h += w.dot(G_j)
E_h = E_h.dot(bj[i])*(1-bj[i])
tthy += (-1)*E_h
tv += np.mat(Tdata[i]).T*np.mat(E_h)
tthj += (-1)*G_j
tw += (G_j*bj[i]).reshape(9,1)
v += tk*tv
w += tk*tw
th_y += tk*tthy
th_s += tk*tthj
if abs(old_ey - ey) < 0.0001:
sn += 1
if sn == 50:
break
else:
old_ey = ey
sn = 0
print('迭代次数:',kn)
return v,w,th_y,th_s
def testClass(data,label,BPFunc):
v,w,thy,ths = BPFunc(data,label)
prel = []
for d in data:
b = d.dot(v)
b = sigmoid(b-thy)
y = np.mat(b)*np.mat(w)
y = sigmoid(y-ths)
prel.append(y)
for i in range(len(prel)):
print(prel[i],'\t',label[i])
if __name__ == '__main__':
data = loadData()
label_list,Tdata,Ldata = initData(data)
testClass(Tdata,Ldata,BP)
转自http://blog.csdn.net/icefire_tyh/article/details/52106069;
原来采用matlab实现
刚刚开始写,有不对的地方希望大家指出
数据如下:
编号 色泽根蒂 敲声 纹理 脐部 触感密度 含糖量 好瓜
1 青绿 蜷缩浊响 清晰 凹陷 硬滑 0.6970.46 是
2 乌黑 蜷缩沉闷 清晰 凹陷 硬滑 0.7740.376 是
3 乌黑 蜷缩浊响 清晰 凹陷 硬滑 0.6340.264 是
4 青绿 蜷缩沉闷 清晰 凹陷 硬滑 0.6080.318 是
5 浅白 蜷缩浊响 清晰 凹陷 硬滑 0.5560.215 是
6 青绿 稍蜷浊响 清晰 稍凹 软粘 0.4030.237 是
7 乌黑 稍蜷浊响 稍糊 稍凹 软粘 0.4810.149 是
8 乌黑 稍蜷浊响 清晰 稍凹 硬滑 0.4370.211 是
9 乌黑 稍蜷沉闷 稍糊 稍凹 硬滑 0.6660.091 否
10 青绿 硬挺清脆 清晰 平坦 软粘 0.2430.267 否
11 浅白 硬挺清脆 模糊 平坦 硬滑 0.2450.057 否
12 浅白 蜷缩浊响 模糊 平坦 软粘 0.3430.099 否
13 青绿 稍蜷浊响 稍糊 凹陷 硬滑 0.6390.161 否
14 浅白 稍蜷沉闷 稍糊 凹陷 硬滑 0.6570.198 否
15 乌黑 稍蜷浊响 清晰 稍凹 软粘 0.360.37 否
16 浅白 蜷缩浊响 模糊 平坦 硬滑 0.5930.042 否
17 青绿 蜷缩沉闷 稍糊 稍凹 硬滑 0.7190.103 否