虽然Lasagne自带的examples中带有MNIST的例子,但是这个例子的风格更接近用Theano来写。使用nolearn.lasagne中的NeuralNet类来写MLP似乎更符合Lasagne的设计初衷,也更像Caffe的风格。
code
没有比代码更清楚地了:
# Using lasagne to fit mnist.
# show how to use NeuralNet
# Tarrega, 150611.
import lasagne
from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet
import os
from urllib import urlretrieve
import gzip
import pickle
import numpy
DATA_URL = 'http://deeplearning.net/data/mnist/mnist.pkl.gz'
DATA_FILENAME = 'mnist.pkl.gz'
def pickle_load(f, encoding):
return pickle.load(f)
def _load_data(url=DATA_URL, filename=DATA_FILENAME):
"""Load data from `url` and store the result in `filename`."""
print 'filename for the minist datatset:',filename
if not os.path.exists(filename):
print("Downloading MNIST dataset")
urlretrieve(url, filename)
with gzip.open(filename, 'rb') as f:
return pickle_load(f, encoding='latin-1')
def load():
"""Get data with labels, split into training, validation and test set."""
data = _load_data()
X_train, y_train = data[0]
y_train = numpy.asarray(y_train,dtype='int32')
#X_valid, y_valid = data[1]
#X_test, y_test = data[2]
print 'size:',X_train.shape, y_train.shape, X_train.dtype, y_train.dtype
return X_train,y_train
net1 = NeuralNet(
layers=[ # three layers: one hidden layer
('input', layers.InputLayer),
('hidden', layers.DenseLayer),
('output', layers.DenseLayer),
],
# layer parameters:
input_shape=(None, 28*28), # 28x28 input pixels per batch
hidden_num_units=200, # number of units in hidden layer
output_nonlinearity=lasagne.nonlinearities.softmax, # output layer
output_num_units=10, # 10 target values
# optimization method:
update=nesterov_momentum,
update_learning_rate=0.01,
update_momentum=0.9,
regression=False, # flag to indicate we're dealing with regression problem
max_epochs=400, # we want to train this many epochs
verbose=1,
)
X, y = load()
net1.fit(X, y)
output
运行结果:
epoch train loss valid loss train/val valid acc dur
------- ------------ ------------ ----------- ----------- -----
1 0.57989 0.32038 1.80998 0.91009 2.60s
2 0.30194 0.26267 1.14949 0.92692 2.64s
3 0.25044 0.22917 1.09281 0.93482 2.63s
4 0.21578 0.20542 1.05046 0.94105 2.83s
5 0.18969 0.18748 1.01177 0.94560 2.73s
6 0.16917 0.17336 0.97585 0.94876 2.73s
7 0.15259 0.16223 0.94059 0.95232 2.80s
8 0.13882 0.15313 0.90655 0.95509 2.79s
9 0.12723 0.14541 0.87498 0.95776 2.71s
10 0.11725 0.13897 0.84373 0.96043 2.65s
11 0.10859 0.13341 0.81392 0.96152 2.74s
12 0.10100 0.12864 0.78511 0.96271 2.65s
13 0.09424 0.12444 0.75735 0.96389 2.65s
14 0.08822 0.12075 0.73056 0.96508 2.75s
15 0.08280 0.11754 0.70446 0.96567 2.66s
16 0.07790 0.11458 0.67988 0.96647 2.55s
17 0.07343 0.11192 0.65606 0.96676 2.68s
18 0.06936 0.10953 0.63324 0.96785 2.69s
19 0.06561 0.10741 0.61082 0.96874 2.61s
20 0.06218 0.10538 0.59000 0.96914 2.60s
21 0.05899 0.10357 0.56959 0.96943 2.64s
22 0.05604 0.10197 0.54953 0.96973 2.59s
23 0.05329 0.10048 0.53033 0.97022 2.61s
24 0.05070 0.09921 0.51105 0.97052 2.71s
25 0.04829 0.09795 0.49296 0.97072 2.68s
Questions:
- 为什么运行速度比Lasagne自带的Example快这么多?NeuralNet类自动把Training分为80%Train,20%Test,可能计算量小了。