http://blog.csdn.net/qingnianzhi/article/details/46912383#comments
先上效果图,然后贴代码。依赖库主要有opencv、Lasagne、nolearn
这幅图是训练用的数据库图片。数据库地址https://www.kaggle.com/c/facial-keypoints-detection 图片保存在csv文件里。
这幅图是训练之后检测的结果,效果还不错,不过对于侧脸的检测可以发现效果相对差一些,少部分图像会出现较大偏差。不过这个检测的数据库图片质量真是不怎么好。
- # coding:utf-8
- import os
- import cv2
- import numpy as np
- from pandas.io.parsers import read_csv
- from sklearn.utils import shuffle
- from lasagne import layers
- from lasagne.updates import nesterov_momentum
- from nolearn.lasagne import NeuralNet
- import theano.tensor as T
- def load():
- df = read_csv(os.path.expanduser(<span style="font-family: Arial, Helvetica, sans-serif;">training.csv</span><span style="font-family: Arial, Helvetica, sans-serif;">)) # load pandas dataframe</span>
- df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))
- print(df.count()) # prints the number of values for each column
- df = df.dropna() # drop all rows that have missing values in them
- X = np.vstack(df['Image'].values) / 255. # scale pixel values to [0, 1]
- X = X.astype(np.float32)
- y = df[df.columns[:-1]].values
- y = ( y - 48 ) / 48. # scale target coordinates to [-1, 1]
- X, y = shuffle(X, y, random_state=42) # shuffle train data
- y = y.astype(np.float32)
- return X, y
- def load_testdata():
- df = read_csv(os.path.expanduser('test.csv')) # load pandas dataframe
- df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))
- X = np.vstack(df['Image'].values)
- X = X.astype(np.float32)
- X = X/255.
- return X
- '''''
- for i in range(len(X)):
- img = X[i]
- img = img.reshape(96,96)
- add = './test_data/'+str(i)+'.Bmp'
- cv2.imwrite(add,img)
- '''
- net1 = NeuralNet(
- layers=[ # three layers: one hidden layer
- ('input', layers.InputLayer),
- ('hidden', layers.DenseLayer),
- ('output', layers.DenseLayer),
- ],
- # layer parameters:
- input_shape=(None, 9216), # 96x96 input pixels per batch
- hidden_num_units=100, # number of units in hidden layer
- output_nonlinearity=None, # output layer uses identity function
- output_num_units=30, # 30 target values
- # optimization method:
- update=nesterov_momentum,
- update_learning_rate=0.01,
- update_momentum=0.9,
- regression=True, # flag to indicate we're dealing with regression problem 回归问题,而不是分类
- max_epochs=400, # we want to train this many epochs
- verbose=1,
- )
- net2 = NeuralNet(
- layers=[
- ('input', layers.InputLayer),
- ('conv1', layers.Conv2DLayer),
- ('pool1', layers.MaxPool2DLayer),
- ('dropout1', layers.DropoutLayer),#
- ('conv2', layers.Conv2DLayer),
- ('pool2', layers.MaxPool2DLayer),
- ('dropout2', layers.DropoutLayer),#
- ('conv3', layers.Conv2DLayer),
- ('pool3', layers.MaxPool2DLayer),
- ('dropout3', layers.DropoutLayer),#
- ('hidden4', layers.DenseLayer),
- ('dropout4', layers.DropoutLayer),#
- ('hidden5', layers.DenseLayer),
- ('output', layers.DenseLayer),
- ],
- input_shape=(None, 1, 96, 96),
- conv1_num_filters=32, conv1_filter_size=(3, 3), pool1_pool_size=(2, 2),
- conv2_num_filters=64, conv2_filter_size=(3, 3), pool2_pool_size=(2, 2),
- conv3_num_filters=128, conv3_filter_size=(3, 3), pool3_pool_size=(2, 2),
- hidden4_num_units=500, hidden5_num_units=500,
- dropout1_p=0.1,dropout2_p=0.2,dropout3_p=0.2,dropout4_p=0.5,
- output_num_units=30, output_nonlinearity=None,
- update_learning_rate=0.01,
- update_momentum=0.9,
- regression=True,
- max_epochs=200,
- verbose=1,
- )
- net3 = NeuralNet(
- # three layers: one hidden layer
- layers=[
- ('input', layers.InputLayer),
- ('h1', layers.DenseLayer),
- ('h2', layers.DenseLayer),
- ('output', layers.DenseLayer),
- ],# layer parameters:# variable batch size.
- input_shape=(None, 9216), # 96x96 input pixels per batch
- h1_num_units = 300, # number of units in hidden layer
- h1_nonlinearity = T.tanh,
- h2_num_units = 100,
- h2_nonlinearity = None, # rectify,
- output_nonlinearity=T.tanh,
- # None if output layer wants to use identity function
- output_num_units=30, # 30 target values# optimization method:
- update=nesterov_momentum,
- update_learning_rate=0.01,
- update_momentum=0.9,regression = True, # flag to indicate we're dealing with regression problem
- max_epochs = 1000, # we want to train this many epochs
- eval_size = 0.1,
- verbose=1
- )
- def train_mlp():
- x , y = load()
- net1.load_params_from('face_weight_mlp')
- net1.fit(x, y)
- net1.save_params_to('face_weight_mlp')
- def test_mlp():
- net1.load_params_from('face_weight_mlp')
- x = load_testdata()
- y = np.empty(shape = (x.shape[0],30))
- y = net1.predict(x)
- #保存图片
- x = x*255
- y = y*48+48
- for i in range(len(x)):
- tmp = x[i].reshape(96,96)
- img = cv2.cvtColor(tmp,cv2.COLOR_GRAY2BGR)
- add = './predict/'+str(i)+'.Bmp'
- for j in range(15):
- point = (y[i][2*j],y[i][2*j+1])
- cv2.circle(img,point,1,(0,255,0))
- cv2.imwrite(add,img)
- def train_ann():
- x , y = load()
- net1.load_params_from('face_weight_ann')
- net3.fit(x, y)
- net3.save_params_to('face_weight_ann')
- def test_ann():
- net3.load_params_from('face_weight_ann')
- x = load_testdata()
- y = np.empty(shape = (x.shape[0],30))
- y = net3.predict(x)
- #保存图片
- x = x*255
- y = y*48+48
- for i in range(len(x)):
- tmp = x[i].reshape(96,96)
- img = cv2.cvtColor(tmp,cv2.COLOR_GRAY2BGR)
- add = './predict/'+str(i)+'.Bmp'
- for j in range(15):
- point = (y[i][2*j],y[i][2*j+1])
- cv2.circle(img,point,1,(0,255,0))
- cv2.imwrite(add,img)
- def train_cnn():
- x , y = load()
- x = x.reshape(-1, 1, 96, 96)
- net2.load_params_from('face_weight_cnn')
- net2.fit(x, y)
- net2.save_params_to('face_weight_cnn')
- def test_cnn():
- net2.load_params_from('face_weight_cnn')
- x = load_testdata()
- x = x.reshape(-1, 1, 96, 96)
- y = np.empty(shape = (x.shape[0],30))
- y = net2.predict(x)
- #保存图片
- x = x*255
- y = y*48+48
- for i in range(len(x)):
- tmp = x[i].reshape(96,96)
- img = cv2.cvtColor(tmp,cv2.COLOR_GRAY2BGR)
- add = './predict/'+str(i)+'.Bmp'
- for j in range(15):
- point = (y[i][2*j],y[i][2*j+1])
- cv2.circle(img,point,1,(0,255,0))
- cv2.imwrite(add,img)
- if __name__ == '__main__':
- #train_ann()
- #test_ann()
- #train_mlp()
- #test_mlp()
- #train_cnn()
- test_cnn()
- print 'OK'
后来没办法只好去用lasagne,谁知道程序出来之后 报错如下:
File "/usr/local/lib/python2.7/dist-packages/theano/gradient.py", line 432, in grad
raise TypeError("cost must be a scalar.")
TypeError: cost must be a scalar.
这里有解决方法https://github.com/dnouri/kfkd-tutorial/issues/16
主要就是这三句:
pip uninstall Lasagne
pip uninstall nolearn
pip install -r https://raw.githubusercontent.com/dnouri/kfkd-tutorial/master/requirements.txt
当时没有找到解决办法,百度不给力呀,然后就想自己解决,这个报错太坑了,专门跑去看了源码,没多大收货。最后想到的解决方法是自己去用theano写,主要要修改的地方如下:
这里的cost函数表示的是归类问题的趋近概率,而我们要的cost函数是表示预测值和真实值的差值,感觉用LMS( Least mean square)函数比较合适(学的少,只知道这个- -)。不过因为theano调试比较麻烦,python学的又菜,感觉调试时间太久,放弃了又回去找之前的问题。其实还是有点懒,用了这些库之后就不太想再用theano了。