作业一线性回归

最新推荐文章于 2024-09-12 20:06:16 发布

月下思艳

最新推荐文章于 2024-09-12 20:06:16 发布

阅读量90

点赞数

分类专栏：机器学习文章标签： python 机器学习

本文链接：https://blog.csdn.net/qq_50600845/article/details/120315535

版权

机器学习专栏收录该内容

3 篇文章 0 订阅

订阅专栏

单变量

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

def costFunction(X,Y,theta):
    inner = np.power((X @ theta - Y),2)#X@theta 等价于X.dot(theta)
    return np.sum(inner) / (2 * len(X))

def gradientDescent(X,Y,theta,alpha,iters):
    costs = []
    for i in range(iters):
        theta = theta - alpha * X.T @ (X @ theta - Y) /len(X)
        cost = costFunction(X,Y,theta)
        costs.append(cost)#将每个代价函数保存

    return theta,costs

data = pd.read_csv("D:\桌面\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data1.txt", names=["popalution","profit"])
#print(data.head())看前五行的数据
data.insert(0,'ones',1)
#print(data.head())
X = data.iloc[:,0:-1]
#print(X.head())
Y = data.iloc[:,-1]
X = np.array(X) #将dataframe类型数据转换成数组
Y = np.array(Y)
Y=Y.reshape(97,1)

alpha = 0.01
iters = 1000

theta = np.zeros((2,1))
cost_init = costFunction(X,Y,theta)
g,cost = gradientDescent(X,Y,theta,alpha,iters)
#print(g)#[[-3.24140214],[ 1.1272942 ]]与正规方程法比较theta

#可视化
fig,ax = plt.subplots()
ax.plot(np.arange(iters),cost)
ax.set(xlabel='iters',ylabel='cost',title='cost vs iters')
plt.show()


x = np.linspace(Y.min(),Y.max(),100)
y = g[0,0] + g[1,0] * x
fig,ax = plt.subplots()
ax.scatter(X[:,1],Y,label='training data')
ax.plot(x,y,'r',label='predict')
ax.legend()
ax.set(xlabel='population',ylabel='profit')
plt.show()

多变量

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#2.特征归一化
def normalize_feature(data):
    return (data - data.mean())/data.std()

def costFunction(X,Y,theta):
    inner = np.power((X @ theta - Y),2)#X@theta 等价于X.dot(theta)
    return np.sum(inner) / (2 * len(X))

def gradientDescent(X,Y,theta,alpha,iters):
    costs = []
    for i in range(iters):
        theta = theta - alpha * X.T @ (X @ theta - Y)/len(X)
        cost = costFunction(X,Y,theta)
        costs.append(cost)

    return costs,theta

#1.读取数据
data = pd.read_csv("D:\桌面\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data2.txt",names=["size","bedrooms","price"])
#print(data.head())
data = normalize_feature(data)
#print(data.head())

data.insert(0,"one",1)


#4.构造数据集,切片
X = data.iloc[:,0:-1]
Y = data.iloc[:,-1]
X = np.array(X)
Y = np.array(Y)
Y=Y.reshape(47,1)

alphas = [0.0003,0.003,0.03]
iters = 1000


theta = np.zeros((3,1))
cost_init = costFunction(X,Y,theta)


fig,ax = plt.subplots()
for alpha in alphas:
    cost, g = gradientDescent(X, Y, theta, alpha, iters)
    ax.plot(np.arange(iters),cost,label=alpha)
    ax.legend()
    ax.set(xlabel = "iters",ylabel="cost",title = "cost vs iters")
plt.show()

"""
#3.数据可视化
data.plot.scatter("size","price",label="size")
plt.show()

data.plot.scatter("bedrooms","price",label="bedrooms")
plt.show()
"""

正规方程法

import numpy as np
#import matplotlib.pyplot as plt
import pandas as pd

def normal_Equation(X,Y):
    theta = np.linalg.inv(X.T@X)@X.T@Y
    return theta

data = pd.read_csv("D:\桌面\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data1.txt", names=["popalution","profit"])
#print(data.head())看前五行的数据
data.insert(0,'ones',1)
#print(data.head())
X = data.iloc[:,0:-1]
#print(X.head())
Y = data.iloc[:,-1]
X = np.array(X) #将dataframe类型数据转换成数组
Y = np.array(Y)
Y=Y.reshape(97,1)

theta = normal_Equation(X,Y)
print(theta)

月下思艳

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
作业一线性回归

单变量import numpy as npimport matplotlib.pyplot as pltimport pandas as pddef costFunction(X,Y,theta): inner = np.power((X @ theta - Y),2)#X@theta 等价于X.dot(theta) return np.sum(inner) / (2 * len(X))def gradientDescent(X,Y,theta,alpha,iters):
复制链接

扫一扫