作业一 线性回归

单变量

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

def costFunction(X,Y,theta):
    inner = np.power((X @ theta - Y),2)#X@theta 等价于X.dot(theta)
    return np.sum(inner) / (2 * len(X))

def gradientDescent(X,Y,theta,alpha,iters):
    costs = []
    for i in range(iters):
        theta = theta - alpha * X.T @ (X @ theta - Y) /len(X)
        cost = costFunction(X,Y,theta)
        costs.append(cost)#将每个代价函数保存

    return theta,costs

data = pd.read_csv("D:\桌面\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data1.txt", names=["popalution","profit"])
#print(data.head())看前五行的数据
data.insert(0,'ones',1)
#print(data.head())
X = data.iloc[:,0:-1]
#print(X.head())
Y = data.iloc[:,-1]
X = np.array(X) #将dataframe类型数据转换成数组
Y = np.array(Y)
Y=Y.reshape(97,1)

alpha = 0.01
iters = 1000

theta = np.zeros((2,1))
cost_init = costFunction(X,Y,theta)
g,cost = gradientDescent(X,Y,theta,alpha,iters)
#print(g)#[[-3.24140214],[ 1.1272942 ]]与正规方程法比较theta

#可视化
fig,ax = plt.subplots()
ax.plot(np.arange(iters),cost)
ax.set(xlabel='iters',ylabel='cost',title='cost vs iters')
plt.show()


x = np.linspace(Y.min(),Y.max(),100)
y = g[0,0] + g[1,0] * x
fig,ax = plt.subplots()
ax.scatter(X[:,1],Y,label='training data')
ax.plot(x,y,'r',label='predict')
ax.legend()
ax.set(xlabel='population',ylabel='profit')
plt.show()

多变量

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#2.特征归一化
def normalize_feature(data):
    return (data - data.mean())/data.std()

def costFunction(X,Y,theta):
    inner = np.power((X @ theta - Y),2)#X@theta 等价于X.dot(theta)
    return np.sum(inner) / (2 * len(X))

def gradientDescent(X,Y,theta,alpha,iters):
    costs = []
    for i in range(iters):
        theta = theta - alpha * X.T @ (X @ theta - Y)/len(X)
        cost = costFunction(X,Y,theta)
        costs.append(cost)

    return costs,theta

#1.读取数据
data = pd.read_csv("D:\桌面\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data2.txt",names=["size","bedrooms","price"])
#print(data.head())
data = normalize_feature(data)
#print(data.head())

data.insert(0,"one",1)


#4.构造数据集,切片
X = data.iloc[:,0:-1]
Y = data.iloc[:,-1]
X = np.array(X)
Y = np.array(Y)
Y=Y.reshape(47,1)

alphas = [0.0003,0.003,0.03]
iters = 1000


theta = np.zeros((3,1))
cost_init = costFunction(X,Y,theta)


fig,ax = plt.subplots()
for alpha in alphas:
    cost, g = gradientDescent(X, Y, theta, alpha, iters)
    ax.plot(np.arange(iters),cost,label=alpha)
    ax.legend()
    ax.set(xlabel = "iters",ylabel="cost",title = "cost vs iters")
plt.show()

"""
#3.数据可视化
data.plot.scatter("size","price",label="size")
plt.show()

data.plot.scatter("bedrooms","price",label="bedrooms")
plt.show()
"""

正规方程法

import numpy as np
#import matplotlib.pyplot as plt
import pandas as pd

def normal_Equation(X,Y):
    theta = np.linalg.inv(X.T@X)@X.T@Y
    return theta

data = pd.read_csv("D:\桌面\Coursera-ML-AndrewNg-Notes-master\code\ex1-linear regression\ex1data1.txt", names=["popalution","profit"])
#print(data.head())看前五行的数据
data.insert(0,'ones',1)
#print(data.head())
X = data.iloc[:,0:-1]
#print(X.head())
Y = data.iloc[:,-1]
X = np.array(X) #将dataframe类型数据转换成数组
Y = np.array(Y)
Y=Y.reshape(97,1)

theta = normal_Equation(X,Y)
print(theta)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值