In [3]:
#对于一个工作了5年,在IT行业,百度工作,职级T6这样的人,我们可以用这样的一个特征向量来表示他= (5, IT, 百度, T6)。
# 重要概念1:误差 e = ((labeled_y - predicted_y)^2 )/2 ; E = sum(e)
# 重要概念2:优化, 模型训练找到合适的权重。
# 重要概念3:梯度下降。梯度是一个向量,它指向函数值上升最快的方向。显然,梯度的反方向当然就是函数值下降最快的方向了。
# 我们每次沿着梯度相反方向去修改的值,当然就能走到函数的最小值附近。之所以是最小值附近而不是最小值那个点,
# 是因为我们每次移动的步长不会那么恰到好处,有可能最后一次迭代走远了越过了最小值那个点。
# 步长的选择是门手艺,如果选择小了,那么就会迭代很多轮才能走到最小值附近;如果选择大了,
# 那可能就会越过最小值很远,收敛不到一个好的点上。
# 随机梯度下降算法(Stochastic Gradient Descent, SGD)
from functools import reduce
from DL import Perceptron #继承 perception已经定义的class
f = lambda x: x
class LinearUnit(Perceptron):
def __init__(self,input_num):
In [4]:
def get_training_dataset():
# 构建训练数据
# 输入向量列表,每一项是工作年限
input_vecs = [[5], [3], [8], [1.4], [10.1]]
# 期望的输出列表,月薪,注意要与输入一一对应
labels = [5500, 2300, 7600, 1800, 11400]
return input_vecs, labels
def train_linear_unit():
# 创建感知器,输入参数的特征数为1(工作年限)
lu = LinearUnit(1)
# 训练,迭代10轮, 学习速率为0.01
input_vecs, labels = get_training_dataset()
lu.train(input_vecs, labels, 10, 0.01)
return lu
if __name__ == '__main__':
linear_unit = train_linear_unit()
# 打印训练获得的权重
# 测试
print('Work 3.4 years, monthly salary = %.2f' % linear_unit.predict([3.4]))
print('Work 15 years, monthly salary = %.2f' % linear_unit.predict([15]))
print('Work 1.5 years, monthly salary = %.2f' % linear_unit.predict([1.5]))
print('Work 6.3 years, monthly salary = %.2f' % linear_unit.predict([6.3]))
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
1 iteration done!
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
2 iteration done!
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
3 iteration done!
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
4 iteration done!
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
5 iteration done!
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
6 iteration done!
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
7 iteration done!
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
8 iteration done!
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
9 iteration done!
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
intermediate weights:
10 iteration done!
weights :[1124.0634970262222]
bias :85.485289
Work 3.4 years, monthly salary = 3907.30
Work 15 years, monthly salary = 16946.44
Work 1.5 years, monthly salary = 1771.58
Work 6.3 years, monthly salary = 7167.09
In [11]:
### import matplotlib as plt
%pylab inline
Populating the interactive namespace from numpy and matplotlib