import random
import numpy as np
from sklearn.datasets import load_boston
dataset = load_boston()
dir(dataset)
import pandas as pd
dataframe = pd.DataFrame(dataset['data'])
dataframe.colums = dataset['feature_names']
dataframe
dataframe['price']=dataset['target']
dataframe
important_fearures_data = dataframe[[0,5,6]]
important_fearures_data
fetures_to_price=dict()
import seaborn as sns
plt.scatter(dataframe[5],dataframe['price'])
plt.scatter(dataframe[12],dataframe['price'])
#(0)简单线性回归模拟,监督学习
a,b,r,p,std_err=stats.linregress(dataframe[5],dataframe['price'])
def myfunc(x):
return a*x+b
mymodel=list(map(myfunc,dataframe[5]))
plt.scatter(dataframe[5],dataframe['price'])
plt.plot(dataframe[5],mymodel)
plt.show()#可视化
#(1)随机模拟/蒙特卡洛模拟
X_rm = dataframe[5]
Y = dataframe['price']
def loss(k,b): #损失函数
return np.mean((Y,(k*X_rm+b)**2))
min_loss = float('inf')
best_k, best_b = None,None
for step in range(1000):
k, b=random.randint(-100,100),random.randint(-100,100)
current_loss = loss(k,b)
if current_loss < min_loss:
min_loss = current_loss
print("在第{}步,找到了一个更好的loss:{},此时k:{},b:{}".format(step,min_loss,k,b))
best_b,best_k=b,k
plt.scatter(X_rm,Y)
plt.plot(X_rm,4*X_rm-21)#可视化
#(2)梯度下降
X_rm = dataframe[5]
Y = dataframe['price']
def loss(k,b):
return np.mean((Y,(k*X_rm+b)**2))
def partial_k(k,b,X,y):#对k求导数
return 2*np.mean((y-(k*X+b))*(-X))
def partial_b(k,b,X,y):#对b求导数
return 2*np.mean((y-(k*X+b))*(-1))
min_loss = float('inf')
k, b=random.uniform(-50,50),random.uniform(-50,50)#只需第一次随机,随机后优化
best_k, best_b = None,None
learning_rate = 1e-02
k_history = []
b_history = []
for step in range(1000):
k = k+(-1)*partial_k(k,b,X_rm,Y)*learning_rate#梯度下降
b = b+(-1)*partial_b(k,b,X_rm,Y)*learning_rate#梯度下降
current_loss = loss(k,b)
if current_loss < min_loss:
min_loss = current_loss
print("在第{}步,找到了一个更好的loss:{},此时k:{},b:{}".format(step,min_loss,k,b))
best_k,best_b=k,b
b_history.append(b)
k_history.append(k)
plt.scatter(k_history,b_history)#可视化
plt.scatter(X_rm,Y)
plt.scatter(X_rm,3.561668183318498*X_rm+0.572568547822224)#可视化