import torch
import numpy as np
numpy实现两层神经网络
N,D_in,H,D_out = 64,1000,100,10
x = np.random.randn(N,D_in)
y = np.random.randn(N,D_out)
w1 = np.random.randn(D_in,H)
w2 = np.random.randn(H,D_out)
learning_rate = 1e-6
for t in range(100):
h = x.dot(w1)
h_relu = np.maximum(h,0)
y_pred = h_relu.dot(w2)
loss = np.square(y_pred-y).sum()
print(t,loss)
grad_y_pred = 2.0 * (y_pred - y)
grad_w2 = h_relu.T.dot(grad_y_pred)
grad_h_relu = grad_y_pred.dot(w2.T)
grad_h = grad_h_relu.copy()
grad_h[h<0] = 0
grad_w1 = x.T.dot(grad_h)
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
0 30347035.384433888
1 24718568.653240222
2 23597788.63894424
3 23096459.851170607
4 21029319.754320197
5 16968220.82334677
6 12006552.7923388
7 7665316.670990429
8 4635940.666159237
9 2805222.211301748
10 1770659.9903594556
11 1193155.2029025808
12 861150.3712543799
13 659489.3501920444
14 528196.5922209043
15 436497.0161011358
16 368430.5840908838
17 315511.6611286531
18 272954.117259877
19 237933.08903132967
20 208598.70601409825
21 183750.43490385017
22 162566.31829082302
23 144334.967152506
24 128542.32416747476
25 114802.88316409125
26 102805.98169063582
27 92279.9275370203
28 83010.09700840812
29 74824.2279950018
30 67582.70144117868
31 61152.19873813349
32 55421.62923632564
33 50303.05854643542
34 45725.902359306834
35 41619.659155106914
36 37927.95084726674
37 34605.97425757133
38 31611.648090010305
39 28905.456200955367
40 26455.884397423237
41 24236.857316136233
42 22223.643186388297
43 20394.08862393687
44 18730.045056513158
45 17214.080157408902
46 15832.055550383366
47 14571.038324127541
48 13418.386602541708
49 12365.538760812191
50 11403.50309077913
51 10522.160463743843
52 9714.33894789827
53 8972.911901268339
54 8292.43265949931
55 7667.840191008367
56 7093.634143361322
57 6565.278272220198
58 6078.726836276828
59 5630.501209933924
60 5217.494767908937
61 4836.555157077071
62 4485.152986181674
63 4160.7489051267285
64 3860.96225583552
65 3584.1074455714843
66 3328.3717047603645
67 3091.9034212241377
68 2873.0359176828133
69 2670.4789754930616
70 2482.89088282988
71 2309.2004426825206
72 2148.2041159985074
73 1998.9290527010658
74 1860.4678090436898
75 1731.9929371075132
76 1612.8280894162003
77 1502.1449234975764
78 1399.4309631926496
79 1304.0625960549223
80 1215.369920797412
81 1132.9729313715118
82 1056.455426863299
83 985.2916366640375
84 919.059425165188
85 857.4425263852534
86 800.1558009341131
87 746.7987866063822
88 697.1188566016917
89 650.84884613353
90 607.7756551572991
91 567.6300088867403
92 530.276023968143
93 495.50123290163606
94 463.06217142632556
95 432.8126495265047
96 404.593327991592
97 378.27784385721804
98 353.71425819348053
99 330.7921299156915
pytorch实现两层神经网络
- numpy的dot() 等价于 torch的 mm : 矩阵的乘法
N,D_in,H,D_out = 64,1000,100,10
x = torch.randn(N,D_in)
y = torch.randn(N,D_out)
w1 = torch.randn(D_in,H)
w2 = torch.randn(H,D_out)
learning_rate = 1e-6
for t in range