一:返回一个5阶单位矩阵
import numpy as np
def warmupExercise():
E5=np.eye(5)
print('这是一个五阶单位矩阵')
print(E5)
warmupExercise()
二:线性回归
1.含有一个变量,大意是:假如你是一个饭店老板,要在其他城市拓展业务,现有数据在ex1data.txt第一列是人口,第二列是收益
导包
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
将数据读取,进行展示
data = pd.read_csv('ex1data1.txt',names=['Population','Profit'])
data.describe()
data.plot(x='Population',y='Profit',kind='scatter')
plt.show()
data.describe()
定义损失函数:
#代价函数
# * 在matrix类型中是矩阵的叉乘,multiply是对应元素相乘
# * 在ndarray类型中,dot或 @ 是叉乘,* 是对应元素相乘
def computeCost(X,y,theta):
inner = np.power(((X*theta.T) - y ),2)
return np.sum(inner)/(2*len(X))
能够直接矩阵相乘,增加一列1
#增加x0
data.insert(0,'Ones',1)
将数据分割出来,0-1列是变量x,2列是y
cols = data.shape[1]
print(cols)
X = data.iloc[:,0:cols-1]
y = data.iloc[:,cols-1:cols]
3
X.head()
y.head()
转化成matrix类型
X = np.matrix(X.values)
y = np.matrix(y.values)
theta = np.matrix([0,0])
X.shape,y.shape,theta.shape
计算代价函数
computeCost(X,y,theta)
设置梯度下降
公式:
def gradientDescent(X, y, theta, alpha, epoch):
"""reuturn theta, cost"""
temp = np.matrix(np.zeros(theta.shape)) # 初始化一个 θ 临时矩阵(1, 2)
parameters = int(theta.flatten().shape[1]) # 参数 θ的数量
cost = np.zeros(epoch) # 初始化一个ndarray,包含每次epoch的cost
m = X.shape[0] # 样本数量m
for i in range(epoch):
# 利用向量化一步求解
temp =theta - (alpha / m) * (X * theta.T - y).T * X
# 以下是不用Vectorization求解梯度下降
# error = (X * theta.T) - y # (97, 1)
# for j in range(parameters):
# term = np.multiply(error, X[:,j]) # (97, 1)
# temp[0,j] = theta[0,j] - ((alpha / m) * np.sum(term)) # (1,1)
theta = temp
cost[i] = computeCost(X, y, theta)
return theta, cost
设置学习率和迭代次数
alpha = 0.01
epoch = 1000
final_theta,cost = gradientDescent(X,y,theta,alpha,epoch)
计算最后的损失
computeCost(X ,y ,final_theta)
绘制线性模型以及数据,直观地看出它的拟合。
np.linspace()在指定的间隔内返回均匀间隔的数字。
x = np.linspace(data.Population.min(),data.Population.max(),100)
f = final_theta[0,0] + (final_theta[0,1]*x) #预测值
fig, ax = plt.subplots(figsize=(6,4))
ax.plot(x, f,'r', label = 'Prediction')
ax.scatter(data['Population'],data.Profit,label='Traing Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()
将cost绘制出来
fig, ax = plt.subplots(figsize=(8,4))
ax.plot(np.arange(epoch),cost,'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()
2.多个变量:ex2data.txt第一列房子大小,第二列房子卧室数量,第三列房子价格。预测房价。
path = 'ex1data2.txt'
data2 = pd.read_csv(path, names=['Size', 'Bedrooms','Price'])
data2.head()
预处理步骤 - 特征归一化
data2 = (data2 - data2.mean())/data2.std()
data2.head()
# add ones column
data2.insert(0, 'Ones', 1)
# set X (training data) and y (target variable)
cols = data2.shape[1]
X2 = data2.iloc[:,0:cols-1]
y2 = data2.iloc[:,cols-1:cols]
# convert to matrices and initialize theta
X2 = np.matrix(X2.values)
y2 = np.matrix(y2.values)
theta2 = np.matrix(np.array([0,0,0]))
# perform linear regression on the data set
g2, cost2 = gradientDescent(X2, y2, theta2, alpha, epoch)
# get the cost (error) of the model
computeCost(X2, y2, g2), g2
绘制代价函数
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(epoch),cost2,'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()
利用sklearn自带的线性回归
from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(X,y)
x =np.array(X[:,1].A1)
f = model.predict(X).flatten()
fig,ax = plt.subplots(figsize=(8,5))
ax.plot(x, f , 'r',label='Prediction')
ax.scatter(data.Population,data.Profit,label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()
直接求解的方法
#正规方程
def normalEpn(X,y):
theta = np.linalg.inv(X.T@X)@X.T@y
return theta
final_theta2 = normalEpn(X,y)
final_theta