import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os
if __name__ == "__main__":
all_data = pd.read_csv(os.path.join("..","data","上海二手房价.csv"))
prices = all_data["房价(元/平米)"].values.reshape(-1,1)
pri_scaler = MinMaxScaler()
pri_scaler.fit(prices)
st_prices = pri_scaler.transform(prices)
# floors = all_data["楼层"].values.reshape(prices.shape[0],prices.shape[1])
floors = all_data["楼层"].values.reshape(*prices.shape) # 解包
flo_scaler = MinMaxScaler()
flo_scaler.fit(floors)
st_floors = flo_scaler.transform(floors)
years = all_data["建成年份"].values.reshape(-1, 1)
yer_scaler = MinMaxScaler()
yer_scaler.fit(years)
st_years = yer_scaler.transform(years)
square = all_data["面积(平米)"].values.reshape(*prices.shape)
squ_scaler = MinMaxScaler()
squ_scaler.fit(floors)
st_squ = squ_scaler.transform(square)
# 矩阵拼接 np.stack, np.vstask np.hstask , np.concatenate
#这几个可以查阅一下
features = np.stack((st_floors,st_years,st_squ),axis=-1).squeeze(axis=1)
#这里本来拼接的数据是(270,1,3),我们不想要中间的那个维度,所以我们将其squeeze,维度从0开始
k = np.random.normal(0,0.5,size=(features.shape[-1],1))#shape(k) = (3,1)
#这里是将k进行一个矩阵化
#features.shape是一个元组,这里用索引
batch_size = 10
epoch = 100
lr = 0.01
for e in range(epoch):
# pre = np.matmul()#这里也可以进行一个矩阵相乘
pre = features @ k # A @ B = C #这里的@就是矩阵相乘
G = (pre - st_prices) / pre.shape[0]
#这里除以270,就是除以batchsize,是为了消除在求delta_k的时候batchsize的影响。这里可以细品,之前也是在求平均,可以看看前面的 -> delta_k1 = np.mean((pre - st_prices) * st_floors)
loss = np.mean((pre - st_prices)**2)
#ferture.shape =(270,3) G.shape = (270,1)
# delta_feature =
delta_k = (features.T @ G )
#既然是矩阵的运算,所以我们这里就要用矩阵的求导。
# delta_k = (features.T @ G ) #delta_k.shape = (3,1)
k = k - delta_k * lr
print(loss)
对sklearn中transform()和fit_transform()的深入理解_sklearn transform-CSDN博客 上面这个链接是关于对sklearn中transform()和fit_transform()的深入理解 Python的numpy中axis=0、axis=1、axis=2解释-CSDN博客 上面这个链接是关于axis的,其中包括最大括号匹配法, np.stack(array, axis)官方对axis参数的解释——解决所有axis参数问题_np.stack axis=-1-CSDN博客 然后还有就是stack的笔记,在上面,也对应着拼接数据时候遇到的维度问题,然后用squeeze将中间的维度给去掉 '''
逻辑回归--多层求导
import numpy as np
def sigmoid(x):
return 1/(1+np.exp(-x))
if __name__ == "__main__":
np.random.seed(100)
dogs = np.array([[8.9,12],[9,11],[10,13],[9.9,11.2],[12.2,10.1],[9.8,13],[8.8,11.2]],dtype = np.float32) # 0
cats = np.array([[3,4],[5,6],[3.5,5.5],[4.5,5.1],[3.4,4.1],[4.1,5.2],[4.4,4.4]],dtype = np.float32) # 1
#第一个特征腿长,·第二个特征毛长
labels = np.array([0] * 7 + [1] * 7,np.int32).reshape(-1,1)
X = np.vstack((dogs,cats))#这里的vstack是将两个数组合并在一起,,就不用指定维度了
#二元交叉熵可以分割实数阈
b = 0
w = np.random.normal(0,1,size=(2,1))#0为均值,1为方差
lr = 0.07
epoch = 1000
for e in range(epoch):
pre = X @ w + b # A@B = pre G, delta_w = X.T @ G,这里的pre是负无穷到正无穷的实数。没法变成两个离散的值,简而言之就是太大而无法计算。
pre_sigmoid = sigmoid(pre) # 0 ~ 1,我们利用是sigmoid函数将其变成0~1的概率值。
loss = -np.sum(labels * np.log(pre_sigmoid) + (1-labels) * np.log(1-pre_sigmoid)) / 7 # (pre-label) ** 2
G = (pre_sigmoid - labels)/7
delta_w = X.T @ G
delta_b = np.sum(G)#这里要求和,前面已经除7了,所以这里不需要再除7。
w = w - delta_w * lr
b = b - delta_b
print(loss)
while True:
f1 = float(input("请输入第一个特征:"))
f2 = float(input("请输入第二个特征:"))
p = np.array([f1,f2]).reshape(1,-1) @ w + b
p = sigmoid(p)
if p>0.5:
print("猫")
else:
print("狗")
写两个中间层
import numpy as np
def sigmoid(x):
# x = np.clip(x,-100,100)
res = 1/(1+np.exp(-x))
res = np.clip(res,1e-10,0.999999)
return res
if __name__ == "__main__":
np.random.seed(100)
dogs = np.array([[8.9,12],[9,11],[10,13],[9.9,11.2],[12.2,10.1],[9.8,13],[8.8,11.2]],dtype = np.float32) # 0
cats = np.array([[3,4],[5,6],[3.5,5.5],[4.5,5.1],[3.4,4.1],[4.1,5.2],[4.4,4.4]],dtype = np.float32) # 1
labels = np.array([0] * 7 + [1] * 7,np.int32).reshape(-1,1)
X = np.vstack((dogs,cats))
b1 = 0
b2 = 0
w1 = np.random.normal(0,1,size=(2,50))
w2 = np.random.normal(0,1,size=(50,1))
lr = 0.01
epoch = 1000
for e in range(epoch):
# pre = X @ w1 @ w2 + b # A@B = pre G, delta_w = X.T @ G
hidden = X @ w1 + b1
hidden_sig = sigmoid(hidden)
pre = hidden_sig @ w2 + b2
pre_sigmoid = sigmoid(pre) # 0 ~ 1
loss = -np.sum(labels * np.log(pre_sigmoid) + (1-labels) * np.log(1-pre_sigmoid)) / 7 # (pre-label) ** 2
G = (pre_sigmoid - labels)/7
delta_b2 = np.sum(G)#B就是g
delta_w2 = hidden.T
delta_h = G @ w2.T
delta_w1 = X.T @ delta_h
delta_b1 = np.sum(delta_h)
w1 = w1 - delta_w1 * lr
w2 = w2 - delta_w2 * lr
b1 = b1 - delta_b1
b2 = b2 - delta_b2
print(loss)
while True:
f1 = float(input("请输入第一个特征:"))
f2 = float(input("请输入第二个特征:"))
h = np.array([f1,f2]).reshape(1,-1) @ w1 + b1
p = h @ w2 + b2
p = sigmoid(p)
if p>0.5:
print("猫")
else:
print("狗")