吴恩达机器学习作业笔记 python——线性回归

最新推荐文章于 2022-07-20 16:00:12 发布

weixin_41852925

最新推荐文章于 2022-07-20 16:00:12 发布

阅读量261

点赞数 1

分类专栏：笔记文章标签： python

本文链接：https://blog.csdn.net/weixin_41852925/article/details/107694983

版权

笔记专栏收录该内容

3 篇文章 0 订阅

订阅专栏

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#设目标方程为 y=theta0+theta1*x1

path =  'ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])
data.head()

data.plot(kind='scatter', x='Population', y='Profit', figsize=(12,8))
#plt.show()

#np.power(x1,x2) x1为底数,x2为指数
def computeCost(X, y, theta):#构造代价函数
    inner = np.power(((X * theta.T) - y), 2)
    return np.sum(inner) / (2 * len(X))


data.insert(0,'Ones',1) #向训练集中的第一列前添加一列，列名为Ones，值为1
#insert(loc, column, value, allow_duplicates=False)
#loc:  int型，表示第几列；若在第一列插入数据，则 loc=0
#column: 给插入的列取名，如 column='新的一列'
#value：数字，array，series等都可（可自己尝试）
#allow_duplicates: 是否允许列名重复，选择Ture表示允许新的列名与已存在的列名重复。

#变量初始化
cols = data.shape[1]#读取data的列数
#iloc通过行号来取数据,逗号前是取行，逗号后是取列
X = data.iloc[:, 0:cols-1]#X是所有行，减掉最后一列
y = data.iloc[:, cols-1:cols]#y是最后一列

#np.matrix
#把X和y转换为矩阵
X = np.matrix(X.values)
y = np.matrix(y.values)
theta = np.matrix(np.array([0, 0]))

print(computeCost(X, y, theta))

#批量梯度下降，alpha为学习速率，iters为迭代次数
def gradientdecent(X, y, theta, alpha, iters):
    temp = np.matrix(np.zeros(theta.shape))  #theta.shape为(1,2):theta为一行两列的矩阵
    #np.zeros函数的作用
    #返回来一个给定形状和类型的用0填充的数组；
    #zeros(shape, dtype=float, order=‘C’)
    #shape: 形状
    #dtype: 数据类型，可选参数，默认numpy.float64
    #order: 可选参数，c代表与c语言类似，行优先；F代表列优先
    parameters = int(theta.ravel().shape[1])  #revel():将多维数组降位一维,parameters=2
    cost = np.zeros(iters)
    #每循环一次，计算一次损失值，并赋值
    for i in range(iters):  #循环iters次
        #误差矩阵
        error = (X*theta.T)-y
        #更新参数值
        for j in range(parameters):    #range(*)循环次数，从0开始到parmeters
            term = np.multiply(error, X[:, j])
             #np.multiply():数组和矩阵对应位置相乘，输出与相乘数组/矩阵的大小一致,X[:,j]表示取X矩阵的所有行和前j列
            temp[0, j] = theta[0, j]-((alpha/len(X))*np.sum(term))
        theta = temp
        cost[i] = computeCost(X, y, theta)
    return theta, cost



alpha = 0.01
iters = 1000
g, cost = gradientdecent(X, y, theta, alpha, iters)
print(g)
print(computeCost(X, y, g))

x = np.linspace(data.Population.min(), data.Population.max(), 100)#Population的数据
#numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0)
#start:返回样本数据开始点
#stop:返回样本数据结束点
#num:生成的样本数据量，默认为50
#endpoint：True则包含stop；False则不包含stop
#retstep：If True, return (samples, step), where step is the spacing between samples.(即如果为True则结果会给出数据间隔)
#dtype：输出数组类型
#axis：0(默认)或-1
f = g[0, 0] + (g[0, 1] * x) #g[0,0]为theta0，g[0,1]为theta1,函数为theta0+theta1*x

fig, ax = plt.subplots(figsize=(12, 8))
#使用该函数确定图的位置，掉用时要XXX=ax.(ax是位置）
#等价于：
#fig = plt.figure()
#ax = fig.add_subplot(1,1,1)
#fig 是图像对象，ax 是坐标轴对象

ax.plot(x, f, 'r', label='Prediction')#用红色线画出函数图像，线名叫Prediction
ax.scatter(data.Population, data.Profit, label='Traning Data')#scatter()散点图
ax.legend(loc=2)#ax.legend设置图例的参数，设置在右上角
ax.set_xlabel('Population')#x轴名
ax.set_ylabel('Profit')#y轴名
ax.set_title('Predicted Profit vs. Population Size')#表名
plt.show()

weixin_41852925

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
吴恩达机器学习作业笔记 python——线性回归

import numpy as npimport pandas as pdimport matplotlib.pyplot as plt#设目标方程为 y=theta0+theta1*x1path = 'ex1data1.txt'data = pd.read_csv(path, header=None, names=['Population', 'Profit'])data.head()data.plot(kind='scatter', x='Population', y='Prof.
复制链接

扫一扫