人工智能原理第一章总结
1.人工智能的定义
1.1第一个层面
智能地把某件特定的事情做好,在某个领域增强人类的智慧,这种方式又叫做智能增强
像人类一样能认知,思考,判断:模拟人类的智能
1.2第二个层面
监督学习、无监督学习、强化学习。
2.神经网络基本工作原理
2.1神经网络三大概念
反向传播、梯度下降、损失函数
2.1.1 线性反向传播
2.1.1.1 ch02, Level1代码理解阅读与测试
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
import numpy as np
def target_function(w,b):
x = 2*w+3*b
y=2*b+1
z=x*y
return x,y,z
def single_variable(w,b,t):
print("\nsingle variable: b ----- ")
error = 1e-5
while(True):
x,y,z = target_function(w,b)
delta_z = z - t
print("w=%f,b=%f,z=%f,delta_z=%f"%(w,b,z,delta_z))
if abs(delta_z) < error:
break
delta_b = delta_z /63
print("delta_b=%f"%delta_b)
b = b - delta_b
print("done!")
print("final b=%f"%b)
def single_variable_new(w,b,t):
print("\nsingle variable new: b ----- ")
error = 1e-5
while(True):
x,y,z = target_function(w,b)
delta_z = z - t
print("w=%f,b=%f,z=%f,delta_z=%f"%(w,b,z,delta_z))
if abs(delta_z) < error:
break
factor_b = 2*x+3*y
delta_b = delta_z/factor_b
print("factor_b=%f, delta_b=%f"%(factor_b, delta_b))
b = b - delta_b
print("done!")
print("final b=%f"%b)
# this version has a bug
def double_variable(w,b,t):
print("\ndouble variable: w, b -----")
error = 1e-5
while(True):
x,y,z = target_function(w,b)
delta_z = z - t
print("w=%f,b=%f,z=%f,delta_z=%f"%(w,b,z,delta_z))
if abs(delta_z) < error:
break
delta_b = delta_z/63/2
delta_w = delta_z/18/2
print("delta_b=%f, delta_w=%f"%(delta_b,delta_w))
b = b - delta_b
w = w - delta_w
print("done!")
print("final b=%f"%b)
print("final w=%f"%w)
# this is correct version
def double_variable_new(w,b,t):
print("\ndouble variable new: w, b -----")
error = 1e-5
while(True):
x,y,z = target_function(w,b)
delta_z = z - t
print("w=%f,b=%f,z=%f,delta_z=%f"%(w,b,z,delta_z))
if abs(delta_z) < error:
break
factor_b, factor_w = calculate_wb_factor(x,y)
delta_b = delta_z/factor_b/2
delta_w = delta_z/factor_w/2
print("factor_b=%f, factor_w=%f, delta_b=%f, delta_w=%f"%(factor_b, factor_w, delta_b,delta_w))
b = b - delta_b
w = w - delta_w
print("done!")
print("final b=%f"%b)
print("final w=%f"%w)
def calculate_wb_factor(x,y):
factor_b = 2*x+3*y
factor_w = 2*y
return factor_b, factor_w
if __name__ == '__main__':
w = 3
b = 4
t = 150
single_variable(w,b,t)
single_variable_new(w,b,t)
double_variable(w,b,t)
double_variable_new(w,b,t)
2.1.1.2 ch02, Level2代码理解阅读与测试
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
import numpy as np
import matplotlib.pyplot as plt
def draw_fun(X,Y):
x = np.linspace(1.2,10)
a = x*x
b = np.log(a)
c = np.sqrt(b)
plt.plot(x,c)
plt.plot(X,Y,'x')
d = 1/(x*np.sqrt(np.log(x**2)))
plt.plot(x,d)
plt.show()
def forward(x):
a = x*x
b = np.log(a)
c = np.sqrt(b)
return a,b,c
def backward(x,a,b,c,y):
loss = c - y
delta_c = loss
delta_b = delta_c * 2 * np.sqrt(b)
delta_a = delta_b * a
delta_x = delta_a / 2 / x
return loss, delta_x, delta_a, delta_b, delta_c
def update(x, delta_x):
x = x - delta_x
if x < 1:
x = 1.1
return x
if __name__ == '__main__':
print("how to play: 1) input x, 2) calculate c, 3) input target number but not faraway from c")
print("input x as initial number(1.2,10), you can try 1.3:")
line = input()
x = float(line)
a,b,c = forward(x)
print("c=%f" %c)
print("input y as target number(0.5,2), you can try 1.8:")
line = input()
y = float(line)
error = 1e-3
X,Y = [],[]
for i in range(20):
# forward
print("forward...")
a,b,c = forward(x)
print("x=%f,a=%f,b=%f,c=%f" %(x,a,b,c))
X.append(x)
Y.append(c)
# backward
print("backward...")
loss, delta_x, delta_a, delta_b, delta_c = backward(x,a,b,c,y)
if abs(loss) < error:
print("done!")
break
# update x
x = update(x, delta_x)
print("delta_c=%f, delta_b=%f, delta_a=%f, delta_x=%f\n" %(delta_c, delta_b, delta_a, delta_x))
draw_fun(X,Y)
2.1.2 梯度下降
“梯度下降”包含了两层含义:
-
梯度:函数当前位置的最快上升点;
-
下降:与导数相反的方向,用数学语言描述就是那个减号。
2.1.2.1 ch02, Level2代码理解阅读与测试
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
import numpy as np
import matplotlib.pyplot as plt
def target_function(x):
y = x*x
return y
def derivative_function(x):
return 2*x
def draw_function():
x = np.linspace(-1.2,1.2)
y = target_function(x)
plt.plot(x,y)
def draw_gd(X):
Y = []
for i in range(len(X)):
Y.append(target_function(X[i]))
plt.plot(X,Y)
if __name__ == '__main__':
x = 1.2
eta = 0.3
error = 1e-3
X = []
X.append(x)
y = target_function(x)
while y > error:
x = x - eta * derivative_function(x)
X.append(x)
y = target_function(x)
print("x=%f, y=%f" %(x,y))
draw_function()
draw_gd(X)
plt.show()
2.1.3 损失函数
2.1.3.1 均方差损失函数
该函数就是最直观的一个损失函数了,计算预测值和真实值之间的欧式距离。预测值和真实值越接近,两者的均方差就越小。
2.1.3.1.1 ch03, Level1代码理解阅读与测试
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import LogNorm
file_name = "../data/ch03.npz"
def TargetFunction(x,w,b):
y = w*x + b
return y
def CreateSampleData(w,b,n):
file = Path(file_name)
if file.exists():
data = np.load(file)
x = data["data"]
y = data["label"]
else:
x = np.linspace(0,1,num=n)
noise = np.random.uniform(-0.5,0.5,size=(n))
y = TargetFunction(x,w,b) + noise
np.savez(file_name, data=x, label=y)
#end if
return x,y
def CostFunction(x,y,z,count):
c = (z - y)**2
loss = c.sum()/count/2
return loss
def ShowResult(ax,x,y,a,loss,title):
ax.scatter(x,y)
ax.plot(x,a,'r')
titles = str.format("{0} Loss={1:01f}",title,loss)
ax.set_title(titles)
# 显示只变化b时loss的变化情况
def CalculateCostB(x,y,n,w,b):
B = np.arange(b-1,b+1,0.05)
Loss=[]
for i in range(len(B)):
z = w*x+B[i]
loss = CostFunction(x,y,z,n)
Loss.append(loss)
plt.title("Loss according to b")
plt.xlabel("b")
plt.ylabel("J")
plt.plot(B,Loss,'x')
plt.show()
# 显示只变化w时loss的变化情况
def CalculateCostW(x,y,n,w,b):
W = np.arange(w-1,w+1,0.05)
Loss=[]
for i in range(len(W)):
z = W[i]*x+b
loss = CostFunction(x,y,z,n)
Loss.append(loss)
plt.title("Loss according to w")
plt.xlabel("w")
plt.ylabel("J")
plt.title = "Loss according to w"
plt.plot(W,Loss,'o')
plt.show()
# 显示同时变化w,b时loss的变化情况
def CalculateCostWB(x,y,n,w,b):
W = np.arange(w-10,w+10,0.1)
B = np.arange(b-10,b+10,0.1)
Loss=np.zeros((len(W),len(B)))
for i in range(len(W)):
for j in range(len(B)):
w = W[i]
b = B[j]
a = w*x+b
loss = CostFunction(x,y,a,n)
Loss[i,j] = loss
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(W,B,Loss)
plt.show()
# 在一张图上分区域显示b的4种取值的loss情况
def show_cost_for_4b(x,y,n,w,b):
fig,((ax1,ax2),(ax3,ax4))=plt.subplots(2,2)
a1 = w*x+b-1
loss1 = CostFunction(x,y,a1,n)
ShowResult(ax1,x,y,a1,loss1,"z=2x+2")
a2 = w*x+b-0.5
loss2 = CostFunction(x,y,a2,n)
ShowResult(ax2,x,y,a2,loss2,"z=2x+2.5")
a3 = w*x+b
loss3 = CostFunction(x,y,a3,n)
ShowResult(ax3,x,y,a3,loss3,"z=2x+3")
a4 = w*x+b+0.5
loss4 = CostFunction(x,y,a4,n)
ShowResult(ax4,x,y,a4,loss4,"z=2x+3.5")
plt.show()
# 在一张图上显示b的4种取值的比较
def show_all_4b(x,y,n,w,b):
plt.scatter(x,y)
z1 = w*x + b-1
loss1 = CostFunction(x,y,z1,n)
plt.plot(x,z1)
z2 = w*x+b-0.5
loss2 = CostFunction(x,y,z2,n)
plt.plot(x,z2)
z3 = w*x+b
loss3 = CostFunction(x,y,z3,n)
plt.plot(x,z3)
z4 = w*x+b+0.5
loss4 = CostFunction(x,y,z4,n)
plt.plot(x,z4)
plt.show()
def show_3d_surface(x,y,m,w,b):
fig = plt.figure()
ax = Axes3D(fig)
X = x.reshape(m,1)
Y = y.reshape(m,1)
len1 = 50
len2 = 50
len = len1 * len2
W = np.linspace(w-2, w+2, len1)
B = np.linspace(b-2, b+2, len2)
W, B = np.meshgrid(W, B)
m = X.shape[0]
Z = np.dot(X, W.ravel().reshape(1,len)) + B.ravel().reshape(1,len)
Loss1 = (Z - Y)**2
Loss2 = Loss1.sum(axis=0,keepdims=True)/m/2
Loss3 = Loss2.reshape(len1, len2)
ax.plot_surface(W, B, Loss3, norm=LogNorm(), cmap='rainbow')
plt.show()
def test_2d(x,y,m,w,b):
s = 200
W = np.linspace(w-2,w+2,s)
B = np.linspace(b-2,b+2,s)
LOSS = np.zeros((s,s))
for i in range(len(W)):
for j in range(len(B)):
z = W[i] * x + B[j]
loss = CostFunction(x,y,z,m)
LOSS[i,j] = round(loss, 2)
print(LOSS)
print("please wait for 20 seconds...")
while(True):
X = []
Y = []
is_first = True
loss = 0
for i in range(len(W)):
for j in range(len(B)):
if LOSS[i,j] != 0:
if is_first:
loss = LOSS[i,j]
X.append(W[i])
Y.append(B[j])
LOSS[i,j] = 0
is_first = False
elif (LOSS[i,j] == loss) or (abs(loss / LOSS[i,j] - 1) < 0.02):
X.append(W[i])
Y.append(B[j])
LOSS[i,j] = 0
if is_first == True:
break
plt.plot(X,Y,'.')
plt.xlabel("w")
plt.ylabel("b")
plt.show()
def draw_contour(x,y,m,w,b):
X = x.reshape(m,1)
Y = y.reshape(m,1)
len1 = 50
len2 = 50
len = len1 * len2
W = np.linspace(w-2, w+2, len1)
B = np.linspace(b-2, b+2, len2)
W, B = np.meshgrid(W, B)
LOSS = np.zeros((len1, len2))
m = X.shape[0]
Z = np.dot(X, W.ravel().reshape(1,len)) + B.ravel().reshape(1,len)
Loss1 = (Z - Y)**2
Loss2 = Loss1.sum(axis=0,keepdims=True)/m/2
Loss3 = Loss2.reshape(len1, len2)
plt.contour(W,B,Loss3,levels=np.logspace(-5, 5, 50), norm=LogNorm(), cmap=plt.cm.jet)
plt.show()
if __name__ == '__main__':
m=50
w=2
b=3
x,y=CreateSampleData(w,b,m)
plt.scatter(x,y)
#plt.axis([0,1.1,0,4.2])
plt.show()
show_cost_for_4b(x,y,m,w,b)
show_all_4b(x,y,m,w,b)
CalculateCostB(x,y,m,w,b)
CalculateCostW(x,y,m,w,b)
#CalculateCostWB(x,y,n)
show_3d_surface(x,y,m,w,b)
draw_contour(x,y,m,w,b)
test_2d(x,y,m,w,b)
2.1.3.1 交叉熵损失函数
交叉熵(Cross
Entropy)是Shannon信息论中一个重要概念,主要用于度量两个概率分布间的差异性信息。在信息论中,交叉熵是表示两个概率分布
p,qp,q 的差异,其中 pp 表示真实分布,qq 表示预测分布,那么 H(p,q)H(p,q)
就称为交叉熵:
2.1.3.1.1 ch03, Level2代码理解阅读与测试
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
import numpy as np
import matplotlib.pyplot as plt
def target_function2(a,y):
p1 = y * np.log(a)
p2 = (1-y) * np.log(1-a)
y = -p1 - p2
return y
if __name__ == '__main__':
err = 1e-2 # avoid invalid math caculation
a = np.linspace(0+err,1-err)
y = 0
z1 = target_function2(a,y)
y = 1
z2 = target_function2(a,y)
p1, = plt.plot(a,z1)
p2, = plt.plot(a,z2)
plt.grid()
plt.legend([p1,p2],["y=0","y=1"])
plt.xlabel("a")
plt.ylabel("Loss")
plt.show()