机器学习中线性回归的几种方法

最小二乘法

(w,b)=(X^{^{T}}X)^{^{-1}}X^{T}y

def zuixiaoercheng_juzhen(datasets_X, datasets_Y):
    #datasets_X是1*44
    # np.set_printoptions(suppress=True)
    fenlaing = np.ones(length)
    datasets_X = np.row_stack((datasets_X, fenlaing))#2*44
    X = np.mat(datasets_X).T #44 * 2
    Y = np.mat(datasets_Y).T #44 * 1
    w = ((X.T * X).I) * X.T * Y
    w = np.array(w)

    return w[0][0], w[1][0]

sklearn中的linear_model

def sklearn_linearmodel(datasets_X, datasets_Y):
    # datasets_X是1*44
    datasets_X = np.array(datasets_X).reshape([length, 1]) #44*1
    datasets_Y = np.array(datasets_Y)
    linear = linear_model.LinearRegression()
    linear.fit(datasets_X, datasets_Y)

    #linear.predict()直接预测
    return linear.coef_, linear.intercept_ #系数和截距

最小二乘法和sklearn最终代码

import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model


datasets_X = [] #面积
datasets_Y = [] #价格

# fr = open('‪C:/Users/Alpha/Desktop/prices.txt','r')

fr = open('../datasets/prices.txt','r')

lines = fr.readlines()

for line in lines:
    items = line.strip().split(',')
    datasets_X.append(int(items[0]))
    datasets_Y.append(int(items[1]))

length = len(datasets_X)

datasets_X = np.array(datasets_X)
datasets_Y = np.array(datasets_Y)



def zuixiaoercheng_juzhen(datasets_X, datasets_Y):
    #datasets_X是1*44
    # np.set_printoptions(suppress=True)
    fenlaing = np.ones(length)
    datasets_X = np.row_stack((datasets_X, fenlaing))#2*44
    X = np.mat(datasets_X).T #44 * 2
    Y = np.mat(datasets_Y).T #44 * 1
    w = ((X.T * X).I) * X.T * Y
    w = np.array(w)

    return w[0][0], w[1][0]


def sklearn_linearmodel(datasets_X, datasets_Y):
    # datasets_X是1*44
    datasets_X = np.array(datasets_X).reshape([length, 1]) #44*1
    datasets_Y = np.array(datasets_Y)
    linear = linear_model.LinearRegression()
    linear.fit(datasets_X, datasets_Y)

    #linear.predict()直接预测
    return linear.coef_, linear.intercept_ #系数和截距



minX = min(datasets_X)
maxX = max(datasets_X)
X = np.arange(minX,maxX).reshape([-1,1])

w1, b1 = zuixiaoercheng_juzhen(datasets_X, datasets_Y)
w2, b2 = sklearn_linearmodel(datasets_X, datasets_Y)
print(w1)
print(b2)

plt.scatter(datasets_X, datasets_Y, color='red')
plt.xlabel('Area')
plt.ylabel('Price')
plt.plot(X, w*X+b, color='blue')

plt.plot(X, w2*X+b2, color='yellow')
plt.plot(X, w1*X+b1, color='purple')
plt.show()

结果展示


梯度下降方法

第一种梯度下降方法

import numpy as np
import matplotlib.pyplot as plt
import random

datasets_X = [] #面积
datasets_Y = [] #价格

fr = open('../datasets/prices1.txt', 'r')

lines = fr.readlines()


for line in lines:
    items = line.strip().split(',') #变成了数组,长度为2
    datasets_X.append(float(items[0]))
    datasets_Y.append(float(items[1]))


def grade_desc(x, y, current_w, current_b, lr):

    grade_w = (current_w * x + current_b - y) * x
    grade_b = (current_w * x + current_b - y)

    update_w = current_w - lr * grade_w
    update_b = current_b - lr * grade_b

    return update_w, update_b

def train(data_X, data_Y, current_w, current_b, lr):

    length = len(data_X)
    cw = current_w
    cb = current_b
    for i in range(length):
        cw, cb = grade_desc(data_X[i], data_Y[i], cw, cb, lr)

    return cw, cb

epochs = 10
lr = 0.0001
initial_w = 0
initial_b = 0
w = initial_w
b = initial_b
for k in range(epochs):
    w, b = train(datasets_X, datasets_Y, w, b, lr)

minX = min(datasets_X)
maxX = max(datasets_X)
X = np.arange(minX,maxX).reshape([-1,1])


plt.scatter(datasets_X, datasets_Y, color='red')
plt.xlabel('Area')
plt.ylabel('Price')
plt.plot(X, w*X+b, color='blue')

plt.show()

第二种梯度下降方法

import numpy as np
import matplotlib.pyplot as plt
import random


np.set_printoptions(suppress=True)

datasets_X = [] #面积
datasets_Y = [] #价格

fr = open('../datasets/prices1.txt', 'r')

lines = fr.readlines()


for line in lines:
    items = line.strip().split(',') #变成了数组,长度为2
    datasets_X.append(float(items[0]))
    datasets_Y.append(float(items[1]))



def grade_desc(data_X, data_Y, current_w, current_b, lr):
    sum_grad_w = 0
    sum_grad_b = 0
    M = len(data_X)
    for i in range(M):
        x = data_X[i]
        y = data_Y[i]
        sum_grad_w += (current_w * x + current_b - y) * x
        sum_grad_b += (current_w * x + current_b - y)

    grad_w = 2 / M * sum_grad_w
    grad_b = 2 / M * sum_grad_b

    # 梯度下降,更新当前的 w 和 b
    updated_w = current_w - lr * grad_w
    updated_b = current_b - lr * grad_b

    return updated_w, updated_b


def compute_cost(w, b, X, Y):
    total_cost = 0
    M = len(X)
    # 逐点计算【实际数据 yi 与 模型数据 f(xi) 的差值】的平方,然后求平均
    for i in range(M):
        x = X[i]
        y = Y[i]
        total_cost += (y - w * x - b) ** 2

    return total_cost / M

epochs = 10
lr = 0.0001
initial_w = 0
initial_b = 0
w = initial_w
b = initial_b

for k in range(epochs):
    w, b = grade_desc(datasets_X, datasets_Y, w, b, lr)

cost = compute_cost(w, b, datasets_X, datasets_Y)
print(w)
print(b)
print(cost)



#
minX = min(datasets_X)
maxX = max(datasets_X)
X = np.arange(minX,maxX).reshape([-1,1])

plt.scatter(datasets_X, datasets_Y, color='red')
plt.xlabel('Area')
plt.ylabel('Price')
plt.plot(X, w*X+b, color='blue')

plt.show()

 


 

price.txt

1000,168
792,184
1260,197
1262,220
1240,228
1170,248
1230,305
1255,256
1194,240
1450,230
1481,202
1475,220
1482,232
1484,460
1512,320
1680,340
1620,240
1720,368
1800,280
4400,710
4212,552
3920,580
3212,585
3151,590
3100,560
2700,285
2612,292
2705,482
2570,462
2442,352
2387,440
2292,462
2308,325
2252,298
2202,352
2157,403
2140,308
4000,795
4200,765
3900,705
3544,420
2980,402
4355,762
3150,392

prices1.txt

32.502345269453031,31.70700584656992
53.426804033275019,68.77759598163891
61.530358025636438,62.562382297945803
47.475639634786098,71.546632233567777
59.813207869512318,87.230925133687393
55.142188413943821,78.211518270799232
52.211796692214001,79.64197304980874
39.299566694317065,59.171489321869508
48.10504169176825,75.331242297063056
52.550014442733818,71.300879886850353
45.419730144973755,55.165677145959123
54.351634881228918,82.478846757497919
44.164049496773352,62.008923245725825
58.16847071685779,75.392870425994957
56.727208057096611,81.43619215887864
48.955888566093719,60.723602440673965
44.687196231480904,82.892503731453715
60.297326851333466,97.379896862166078
45.618643772955828,48.847153317355072
38.816817537445637,56.877213186268506
66.189816606752601,83.878564664602763
65.41605174513407,118.59121730252249
47.48120860786787,57.251819462268969
41.57564261748702,51.391744079832307
51.84518690563943,75.380651665312357
59.370822011089523,74.765564032151374
57.31000343834809,95.455052922574737
63.615561251453308,95.229366017555307
46.737619407976972,79.052406169565586
50.556760148547767,83.432071421323712
52.223996085553047,63.358790317497878
35.567830047746632,41.412885303700563
42.436476944055642,76.617341280074044
58.16454011019286,96.769566426108199
57.504447615341789,74.084130116602523
45.440530725319981,66.588144414228594
61.89622268029126,77.768482417793024
33.093831736163963,50.719588912312084
36.436009511386871,62.124570818071781
37.675654860850742,60.810246649902211
44.555608383275356,52.682983366387781
43.318282631865721,58.569824717692867
50.073145632289034,82.905981485070512
43.870612645218372,61.424709804339123
62.997480747553091,115.24415280079529
32.669043763467187,45.570588823376085
40.166899008703702,54.084054796223612
53.575077531673656,87.994452758110413
33.864214971778239,52.725494375900425
64.707138666121296,93.576118692658241
38.119824026822805,80.166275447370964
44.502538064645101,65.101711570560326
40.599538384552318,65.562301260400375
41.720676356341293,65.280886920822823
51.088634678336796,73.434641546324301
55.078095904923202,71.13972785861894
41.377726534895203,79.102829683549857
62.494697427269791,86.520538440347153
49.203887540826003,84.742697807826218
41.102685187349664,59.358850248624933
41.182016105169822,61.684037524833627
50.186389494880601,69.847604158249183
52.378446219236217,86.098291205774103
50.135485486286122,59.108839267699643
33.644706006191782,69.89968164362763
39.557901222906828,44.862490711164398
56.130388816875467,85.498067778840223
57.362052133238237,95.536686846467219
60.269214393997906,70.251934419771587
35.678093889410732,52.721734964774988
31.588116998132829,50.392670135079896
53.66093226167304,63.642398775657753
46.682228649471917,72.247251068662365
43.107820219102464,57.812512976181402
70.34607561504933,104.25710158543822
44.492855880854073,86.642020318822006
57.50453330326841,91.486778000110135
36.930076609191808,55.231660886212836
55.805733357942742,79.550436678507609
38.954769073377065,44.847124242467601
56.901214702247074,80.207523139682763
56.868900661384046,83.14274979204346
34.33312470421609,55.723489260543914
59.04974121466681,77.634182511677864
57.788223993230673,99.051414841748269
54.282328705967409,79.120646274680027
51.088719898979143,69.588897851118475
50.282836348230731,69.510503311494389
44.211741752090113,73.687564318317285
38.005488008060688,61.366904537240131
32.940479942618296,67.170655768995118
53.691639571070056,85.668203145001542
68.76573426962166,114.85387123391394
46.230966498310252,90.123572069967423
68.319360818255362,97.919821035242848
50.030174340312143,81.536990783015028
49.239765342753763,72.111832469615663
50.039575939875988,85.232007342325673
48.149858891028863,66.224957888054632
25.128484647772304,53.454394214850524

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值