1 输出一个5*5的单位矩阵
import numpy as np
a=np.eye(5)
print(a)
结果:
[[1. 0. 0. 0. 0.]
[0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0.]
[0. 0. 0. 1. 0.]
[0. 0. 0. 0. 1.]]
2 单变量的线性回归
整个2的部分需要根据城市人口数量,预测开小吃店的利润
数据在ex1data1.txt里,第一列是城市人口数量,第二列是该城市小吃店利润。
2.1画出数据
import matplotlib.pyplot as plt
ex=open("E:\exer\python\wex1\ex1data1.txt","r")
ex1=ex.readlines()
x=[]
y=[]
for i in ex1:
i=i.strip().split(",")
x.append(float(i[0]))
y.append(float(i[1]))
plt.scatter(x,y,s=5,c='#FF69B4',alpha=0.8)
plt.xlabel("population")
plt.ylabel("profit")
plt.savefig("ex1.jpg")
plt.show()
ex.close()
在练习这一部分时,遇到的问题是,数据进入数组后无法画出来,找了很多参考都没法解决,然后意识到在读取数据时,存储的是字符型数据,需要转换成浮点数。
此外,在保存图片时,savefig要写在show前面,否则打开的图片会显示出一片空白
结果图:
2.2梯度下降
import matplotlib.pyplot as plt
import random
ex=open("E:\exer\python\wex1\ex1data1.txt","r")
ex1=ex.readlines()
x=[]
y=[]
for i in ex1:
i=i.strip().split(",")
x.append(float(i[0]))
y.append(float(i[1]))
ex.close()
random.seed(1000)
a=random.random()
b=random.random()
m=0
q=0.01
iters=[]
Jp=[]
h_x1=[]
while(m<=2000):
J0 = 0
J1 = 0
J2 = 0
for i in range(0,len(x)):
h_x=a*x[i]+b
J0=J0+(h_x-y[i])**2
J1=(h_x-y[i])*x[i]+J1
J2=J2+(h_x-y[i])
J=J0/(2*len(x))
m=m+1
a=a-q*J1/len(x)
b=b-q*J2/len(x)
iters.append(m)
Jp.append(J)
for i in range(0,len(x)):
h_x1.append(a*x[i]+b)
plt.subplot(1,2,1)
plt.plot(iters,Jp)
plt.xlabel("iters")
plt.ylabel("lost")
plt.subplot(1,2,2)
plt.scatter(x,y)
plt.plot(x,h_x1,c='#FF69B4')
plt.xlabel("x")
plt.ylabel("y")
plt.savefig("ex1")
plt.show()
print(a,b)
结果图:
review:
import matplotlib.pyplot as plt
import random
import numpy as np
#read data
doc=open("E:\exer\python\exercise1\ex1data1.txt","r")
data=doc.readlines()
population=[]
profit=[]
for i in data:
i=i.strip().split(",")
population.append(float(i[0]))
profit.append(float(i[1]))
doc.close()
m=len(population)
#scatter data
'''plt.scatter(population,profit)
plt.xlabel("population")
plt.ylabel("profit")
plt.show()'''
#initiate the parameter
random.seed(100)
a=random.random()
b=random.random()
#forward
l=0.01#步长
iteration=1000
J=[]
for i in range(iteration):
y=np.multiply(a,population)+b
J1=np.dot((y-profit),(y-profit))/(2*m)
J.append(J1)
a=a-l*np.dot((y-profit),population)/m
b=b-l*sum((y-profit))/m
plt.subplot(1,2,1)
#plot loss function
plt.plot(range(0,iteration),J)
#plot the result
plt.subplot(1,2,2)
plt.scatter(population,profit,c="#FF69B4")
x=range(0,int(max(population)))
result=np.multiply(a,x)+b
plt.plot(x,result)
plt.show()
3.多元线性回归
iimport numpy as np
import random
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# read data
alpha=0.1
exer=open("E:\exer\python\wex1\ex1data2.txt","r")
exer1=exer.readlines()
lens=len(exer1)
feature=np.zeros((lens,3))
price=np.zeros((lens,1))
i=0
random.seed(100)
a=random.random()
b=random.random()
c=random.random()
for j in exer1:
j=j.strip().split(",")
feature[i:]=[1,int(j[0]),int(j[1])]
price[i]=int(j[2])
i+=1
exer.close()
# data normalization
feature1=feature
for i in range(1,3):
feature1[:,i]=(feature1[:,i]-np.mean(feature1[:,i]))/np.std(feature1[:,i])
price1=(price-np.mean(price))/np.std(price)
q=np.mat([a,b,c])
q=q.T
Q=[]
I=[]
i=0
while(i<=80):
L1=np.zeros((1,3))
J=0
h = np.dot(feature1,q)
for j in range(0,lens):
J=float((h[j]-price1[j])**2/(2*lens))+J
L1=L1+(float(h[j]-price1[j]))*feature1[j,:]
L1=L1.T
q=q-alpha*L1/lens
Q.append(J)
I.append(i)
i+=1
h = feature1*q
fig1 = plt.figure()
ax1 = Axes3D(fig1)
ax1.scatter((feature1.T)[1,:], (feature1.T)[2,:], price1, c='r')
xx = np.arange(-2,2,0.1)
yy = np.arange(-2,2,0.1)
X, Y = np.meshgrid(xx, yy)
Z =a+b*X+c*Y
ax1.plot_surface(X, Y, Z, rstride=1, cstride=1,)
ax1.set_xlabel("house_size")
ax1.set_ylabel("num of bedrooms")
ax1.set_zlabel("price")
plt.show()
plt.plot(I,Q)
plt.xlabel("iteration")
plt.ylabel("J")
plt.show()