作业1:预测PM2.5的值
在这个作业中,我们将用梯度下降方法预测PM2.5的值
- hw1要求:
1、要求python3.5+
2、只能用(1)numpy(2)scipy(3)pandas
3、请用梯度下降手写线性回归
4、最好的公共简单基线
5、对于想加载模型而并不想运行整个训练过程的人: - hw_best要求:
1、要求python3.5+
2、任何库都可以用
3、在kaggle上获得你选择的更高的分
读取数据:
import numpy as np
import scipy
import pandas as pd
lr = 0.001
# lr = 0.001
def get_train():
# 获得PM2.5的训练数据
with open("train.csv") as f:
data = f.readlines()
data = [i.strip().split(',') for i in data]
train_data = [];train_label = [];
for i in data[10::18]:
for j in range(len(i[3:]) - 10):
train_data.append(i[3:][j : j + 9])
train_label.append(i[3:][j + 10])
return np.array(train_data,dtype = np.float64),np.array(train_label,dtype = np.float64)
def get_test():
# 获得PM2.5的测试数据
with open("test(1).csv") as f:
data = f.readlines()
data = [i.strip().split(',') for i in data]
test_data = [];test_label = [];
for i in data[9::18]:
test_data.append(i[2:])
with open("answer.csv") as f:
data = f.readlines()
data = [i.strip().split(',') for i in data]
for i in data[1:]:
test_label.append(i[1])
return np.array(test_data,dtype = np.float64),np.array(test_label,dtype = np.float64)
import numpy as np
import math
import pandas as pd
from sklearn.preprocessing import StandardScaler
# 损失函数
def lossfunc(x_train,y_train,theta):
x_train = np.array(x_train,float)
y_train = np.array(y_train, float)
return np.sum((y_train - x_train.dot(theta)) ** 2) / len(y_train)
# '损失函数求导
def dlossfunc(x_train,y_train,theta):
x_train = np.array(x_train,float)
y_train = np.array(y_train, float)
return x_train.T.dot(x_train.dot(theta) - y_train) * 2. / len(y_train)
# 归一化
def standardfunc(data):
standard = StandardScaler()
standard.fit(data)
data =standard.transform(data)
return data
- 待补充: