前面有一个帖《OpenCV检测场景内是否有移动物体》我用树莓派做了一个简单的Motion Detection,放在卫生间的,它会在我上大号时自动播放音乐。
我一个人租房,几个盆友周末时常会找我玩,他们觉得我做的Motion Detection很垃圾。于是我就想恶搞一下,用TensorFlow做一个“人脸识别”,在我上大号时播放音乐,如果是别人就播放《张震讲鬼故事》(@xingCI说放屁声更搞)。
我的任务的训练一个模型可以区分“我”和“其它人”的脸。注意,上面“人脸识别”我是加引号的,其实并不是真正的人脸识别,充其量就是个图像分类。如果你要使用真正的人脸识别,可以试试现成的库OpenFace+dlib《使用OpenFace进行人脸识别》。
有人已经把TensorFlow移植到了树莓派,项目地址tensorflow-on-raspberry-pi。
准备数据
本帖需要使用到两组数据:一组是包含我脸的图像,另一组包含其它人人脸的图像。
其它人人脸的收集
找一堆图片,只要不包含自己就行,然后使用OpenCV提取图像中的大脸。
我使用的数据集是前面几帖用到的:《妹子图》《大脸》《imagenet》
提取图像中的人脸,我使用OpenCV,据说使用dlib效果更好。
other_peoples_faces.py:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
import
cv2
import
os
import
sys
IMAGE_DIR
=
'图片目录路径'
OUTPUT_DIR
=
'./other_people'
if
not
os.path
.
exists
(
OUTPUT_DIR
)
:
os
.
makedirs
(
OUTPUT_DIR
)
# http://blog.topspeedsnail.com/archives/10511
# wget https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml
face_haar
=
cv2
.
CascadeClassifier
(
"haarcascade_frontalface_default.xml"
)
for
(
dirpath
,
dirnames
,
filenames
)
in
os
.
walk
(
IMAGE_DIR
)
:
for
filename
in
filenames
:
if
filename
.
endswith
(
'.jpg'
)
:
image_path
=
os.path
.
join
(
dirpath
,
filename
)
print
(
'process: '
,
image_path
)
img
=
cv2
.
imread
(
image_path
)
gray_image
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
faces
=
face_haar
.
detectMultiScale
(
gray_image
,
1.3
,
5
)
for
face_x
,
face_y
,
face_w
,
face_h
in
faces
:
face
=
img
[
face_y
:
face_y
+
face_h
,
face_x
:
face_x
+
face_w
]
face
=
cv2
.
resize
(
face
,
(
64
,
64
)
)
cv2
.
imshow
(
"img"
,
face
)
cv2
.
imwrite
(
os.path
.
join
(
OUTPUT_DIR
,
filename
)
,
face
)
key
=
cv2
.
waitKey
(
30
)
&
0xff
if
key
==
27
:
sys
.
exit
(
0
)
|
4万多图片,我只提取了1万张脸,应该够使了。
上面是OpenCV做的人脸检测,有了这个数据集又可以反过来训练TensorFlow版本的人脸检测。
斗大熊的脸
给自己拍照1万张,这是我一次拍照最多的一回。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
import
cv2
import
os
import
sys
OUTPUT_DIR
=
'./my_faces'
if
not
os.path
.
exists
(
OUTPUT_DIR
)
:
os
.
makedirs
(
OUTPUT_DIR
)
face_haar
=
cv2
.
CascadeClassifier
(
"haarcascade_frontalface_default.xml"
)
cam
=
cv2
.
VideoCapture
(
0
)
count
=
0
while
True
:
print
(
count
)
if
count
<
10000
:
_
,
img
=
cam
.
read
(
)
gray_image
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
faces
=
face_haar
.
detectMultiScale
(
gray_image
,
1.3
,
5
)
for
face_x
,
face_y
,
face_w
,
face_h
in
faces
:
face
=
img
[
face_y
:
face_y
+
face_h
,
face_x
:
face_x
+
face_w
]
face
=
cv2
.
resize
(
face
,
(
64
,
64
)
)
cv2
.
imshow
(
'img'
,
face
)
cv2
.
imwrite
(
os.path
.
join
(
OUTPUT_DIR
,
str
(
count
)
+
'.jpg'
)
,
face
)
count
+=
1
key
=
cv2
.
waitKey
(
30
)
&
0xff
if
key
==
27
:
break
else
:
break
|
在镜头前摇头晃脑、摆pose,戴眼镜、耳机,仰天45,写代码,呲牙咧嘴,玩手机。。。一定要多样化,直到拍1万张大脸。
训练模型
训练数据有了,下面开始训练。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
import
tensorflow
as
tf
import
cv2
import
numpy
as
np
import
os
from
sklearn
.
model_selection
import
train_test_split
import
random
import
sys
my_image_path
=
'my_faces'
others_image_path
=
'other_people'
image_data
=
[
]
label_data
=
[
]
def
get_padding_size
(
image
)
:
h
,
w
,
_
=
image
.
shape
longest_edge
=
max
(
h
,
w
)
top
,
bottom
,
left
,
right
=
(
0
,
0
,
0
,
0
)
if
h
<
longest_edge
:
dh
=
longest_edge
-
h
top
=
dh
/
/
2
bottom
=
dh
-
top
elif
w
<
longest_edge
:
dw
=
longest_edge
-
w
left
=
dw
/
/
2
right
=
dw
-
left
else
:
pass
return
top
,
bottom
,
left
,
right
def
read_data
(
img_path
,
image_h
=
64
,
image_w
=
64
)
:
for
filename
in
os
.
listdir
(
img_path
)
:
if
filename
.
endswith
(
'.jpg'
)
:
filepath
=
os.path
.
join
(
img_path
,
filename
)
image
=
cv2
.
imread
(
filepath
)
top
,
bottom
,
left
,
right
=
get_padding_size
(
image
)
image_pad
=
cv2
.
copyMakeBorder
(
image
,
top
,
bottom
,
left
,
right
,
cv2
.
BORDER_CONSTANT
,
value
=
[
0
,
0
,
0
]
)
image
=
cv2
.
resize
(
image_pad
,
(
image_h
,
image_w
)
)
image_data
.
append
(
image
)
label_data
.
append
(
img_path
)
read_data
(
others_image_path
)
read_data
(
my_image_path
)
image_data
=
np
.
array
(
image_data
)
label_data
=
np
.
array
(
[
[
0
,
1
]
if
label
==
'my_faces'
else
[
1
,
0
]
for
label
in
label_data
]
)
train_x
,
test_x
,
train_y
,
test_y
=
train_test_split
(
image_data
,
label_data
,
test_size
=
0.05
,
random_state
=
random
.
randint
(
0
,
100
)
)
# image (height=64 width=64 channel=3)
train_x
=
train_x
.
reshape
(
train_x
.
shape
[
0
]
,
64
,
64
,
3
)
test_x
=
test_x
.
reshape
(
test_x
.
shape
[
0
]
,
64
,
64
,
3
)
# nomalize
train_x
=
train_x
.
astype
(
'float32'
)
/
255.0
test_x
=
test_x
.
astype
(
'float32'
)
/
255.0
print
(
len
(
train_x
)
,
len
(
train_y
)
)
print
(
len
(
test_x
)
,
len
(
test_y
)
)
#############################################################
batch_size
=
128
num_batch
=
len
(
train_x
)
/
/
batch
_size
X
=
tf
.
placeholder
(
tf
.
float32
,
[
None
,
64
,
64
,
3
]
)
# 图片大小64x64 channel=3
Y
=
tf
.
placeholder
(
tf
.
float32
,
[
None
,
2
]
)
keep_prob_5
=
tf
.
placeholder
(
tf
.
float32
)
keep_prob_75
=
tf
.
placeholder
(
tf
.
float32
)
def
panda_joke_cnn
(
)
:
W_c1
=
tf
.
Variable
(
tf
.
random_normal
(
[
3
,
3
,
3
,
32
]
,
stddev
=
0.01
)
)
b_c1
=
tf
.
Variable
(
tf
.
random_normal
(
[
32
]
)
)
conv1
=
tf
.
nn
.
relu
(
tf
.
nn
.
bias_add
(
tf
.
nn
.
conv2d
(
X
,
W_c1
,
strides
=
[
1
,
1
,
1
,
1
]
,
padding
=
'SAME'
)
,
b_c1
)
)
conv1
=
tf
.
nn
.
max_pool
(
conv1
,
ksize
=
[
1
,
2
,
2
,
1
]
,
strides
=
[
1
,
2
,
2
,
1
]
,
padding
=
'SAME'
)
conv1
=
tf
.
nn
.
dropout
(
conv1
,
keep_prob_5
)
W_c2
=
tf
.
Variable
(
tf
.
random_normal
(
[
3
,
3
,
32
,
64
]
,
stddev
=
0.01
)
)
b_c2
=
tf
.
Variable
(
tf
.
random_normal
(
[
64
]
)
)
conv2
=
tf
.
nn
.
relu
(
tf
.
nn
.
bias_add
(
tf
.
nn
.
conv2d
(
conv1
,
W_c2
,
strides
=
[
1
,
1
,
1
,
1
]
,
padding
=
'SAME'
)
,
b_c2
)
)
conv2
=
tf
.
nn
.
max_pool
(
conv2
,
ksize
=
[
1
,
2
,
2
,
1
]
,
strides
=
[
1
,
2
,
2
,
1
]
,
padding
=
'SAME'
)
conv2
=
tf
.
nn
.
dropout
(
conv2
,
keep_prob_5
)
W_c3
=
tf
.
Variable
(
tf
.
random_normal
(
[
3
,
3
,
64
,
64
]
,
stddev
=
0.01
)
)
b_c3
=
tf
.
Variable
(
tf
.
random_normal
(
[
64
]
)
)
conv3
=
tf
.
nn
.
relu
(
tf
.
nn
.
bias_add
(
tf
.
nn
.
conv2d
(
conv2
,
W_c3
,
strides
=
[
1
,
1
,
1
,
1
]
,
padding
=
'SAME'
)
,
b_c3
)
)
conv3
=
tf
.
nn
.
max_pool
(
conv3
,
ksize
=
[
1
,
2
,
2
,
1
]
,
strides
=
[
1
,
2
,
2
,
1
]
,
padding
=
'SAME'
)
conv3
=
tf
.
nn
.
dropout
(
conv3
,
keep_prob_5
)
# Fully connected layer
W_d
=
tf
.
Variable
(
tf
.
random_normal
(
[
8
*
16
*
32
,
512
]
,
stddev
=
0.01
)
)
b_d
=
tf
.
Variable
(
tf
.
random_normal
(
[
512
]
)
)
dense
=
tf
.
reshape
(
conv3
,
[
-
1
,
W_d
.
get_shape
(
)
.
as_list
(
)
[
0
]
]
)
dense
=
tf
.
nn
.
relu
(
tf
.
add
(
tf
.
matmul
(
dense
,
W_d
)
,
b_d
)
)
dense
=
tf
.
nn
.
dropout
(
dense
,
keep_prob_75
)
W_out
=
tf
.
Variable
(
tf
.
random_normal
(
[
512
,
2
]
,
stddev
=
0.01
)
)
b_out
=
tf
.
Variable
(
tf
.
random_normal
(
[
2
]
)
)
out
=
tf
.
add
(
tf
.
matmul
(
dense
,
W_out
)
,
b_out
)
return
out
def
train_cnn
(
)
:
output
=
panda_joke_cnn
(
)
loss
=
tf
.
reduce_mean
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
output
,
Y
)
)
optimizer
=
tf
.
train
.
AdamOptimizer
(
learning_rate
=
0.001
)
.
minimize
(
loss
)
accuracy
=
tf
.
reduce_mean
(
tf
.
cast
(
tf
.
equal
(
tf
.
argmax
(
output
,
1
)
,
tf
.
argmax
(
Y
,
1
)
)
,
tf
.
float32
)
)
tf
.
summary
.
scalar
(
"loss"
,
loss
)
tf
.
summary
.
scalar
(
"accuracy"
,
accuracy
)
merged_summary_op
=
tf
.
summary
.
merge_all
(
)
saver
=
tf
.
train
.
Saver
(
)
with
tf
.
Session
(
)
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
(
)
)
summary_writer
=
tf
.
summary
.
FileWriter
(
'./log'
,
graph
=
tf
.
get_default_graph
(
)
)
for
e
in
range
(
50
)
:
for
i
in
range
(
num_batch
)
:
batch_x
=
train_x
[
i
*
batch_size
:
(
i
+
1
)
*
batch_size
]
batch_y
=
train_y
[
i
*
batch_size
:
(
i
+
1
)
*
batch_size
]
_
,
loss_
,
summary
=
sess
.
run
(
[
optimizer
,
loss
,
merged_summary_op
]
,
feed_dict
=
{
X
:
batch_x
,
Y
:
batch_y
,
keep_prob_5
:
0.5
,
keep_prob_75
:
0.75
}
)
summary_writer
.
add_summary
(
summary
,
e
*
num_batch
+
i
)
print
(
e
*
num_batch
+
i
,
loss_
)
if
(
e
*
num_batch
+
i
)
%
100
==
0
:
acc
=
accuracy
.
eval
(
{
X
:
test_x
,
Y
:
test_y
,
keep_prob_5
:
1.0
,
keep_prob_75
:
1.0
}
)
print
(
e
*
num_batch
+
i
,
acc
)
# save model
if
acc
>
0.98
:
saver
.
save
(
sess
,
"i_am_a_joke.model"
,
global_step
=
e
*
num_batch
+
i
)
sys
.
exit
(
0
)
train_cnn
(
)
|
准确率曲线:
下面要做的就是在树莓派上使用模型,代码示例:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
output
=
panda_joke_cnn
(
)
predict
=
tf
.
argmax
(
output
,
1
)
saver
=
tf
.
train
.
Saver
(
)
sess
=
tf
.
Session
(
)
saver
.
restore
(
sess
,
tf
.
train
.
latest_checkpoint
(
'.'
)
)
def
is_my_face
(
image
)
:
res
=
sess
.
run
(
predict
,
feed_dict
=
{
X
:
[
image
/
255.0
]
,
keep_prob_5
:
1.0
,
keep_prob_75
:
1.0
}
)
if
res
[
0
]
==
1
:
return
True
else
:
return
False
face_haar
=
cv2
.
CascadeClassifier
(
"haarcascade_frontalface_default.xml"
)
cam
=
cv2
.
VideoCapture
(
0
)
while
True
:
_
,
img
=
cam
.
read
(
)
gray_image
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
faces
=
face_haar
.
detectMultiScale
(
gray_image
,
1.3
,
5
)
for
face_x
,
face_y
,
face_w
,
face_h
in
faces
:
face
=
img
[
face_y
:
face_y
+
face_h
,
face_x
:
face_x
+
face_w
]
face
=
cv2
.
resize
(
face
,
(
64
,
64
)
)
print
(
is_my_face
(
face
)
)
cv2
.
imshow
(
'img'
,
face
)
key
=
cv2
.
waitKey
(
30
)
&
0xff
if
key
==
27
:
sys
.
exit
(
0
)
sess
.
close
(
)
|
总结:占用内存100多M,准确率还凑合,先用着。
上面方法需要收集两类数据(自己的脸和其他人的脸),有没有办法只使用自己的脸做One-Class分类呢?这样就不需收集其他人的脸了。感觉上可以用autoencoder。
如要转载,请保持本文完整,并注明作者@斗大的熊猫和本文原始地址: http://blog.topspeedsnail.com/archives/10931