1. 数据处理
img_urls.txt 头像图片url文本
# coding: utf8
import re
import os
import sys
import datetime
import codecs
import urllib.request
from pathlib import Path
from multiprocessing import Pool
import numpy as np
import cv2 as cv
from PIL import Image
import face_recognition
if __name__ == '__main__':
# 1. 下载图片数据
img_urls_file = "img_urls.txt"
local_img_dir = "temp_images/" # 图片本地下载目录
dataset_length = download_img(img_urls_file, local_img_dir)
# 2. 分割训练集测试集
trainset_size = 10000
local_data = "temp_images/"
train_data = "train_images/"
test_data = "test_images/"
split_data(dataset_length, trainset_size, local_data, train_data, test_data)
1.1 下载图片
# 从url文件批量下载图片数据到本地
def download_img(img_urls_file, local_img_dir):
i = 0
with open(img_urls_file) as f:
for line in f:
item_list = line.strip().split("\t")
if len(item_list) >= 2:
img_url = item_list[1]
img_name = local_img_dir + str(i) + ".png"
download_a_img(img_url, img_name)
i += 1
return i
# 下载图片
def download_a_img(img_url, img_name):
api_token = "ii"
try:
header = {"Authorization": "Bearer " + api_token}
request = urllib.request.Request(img_url, headers=header)
response = urllib.request.urlopen(request)
if response.getcode() == 200:
with open(img_name, "wb") as f:
f.write(response.read()) # 将内容写入图片
except:
pass
1.2 数据集分割
# 训练集测试集划分,选取前trainset_size个图片作为训练集,其余作为测试集
def split_data(dataset_length, trainset_size, local_data, train_data, test_data):
width, height = 28, 28
for i in range(dataset_length):
img_name = str(i) + ".png"
fin = local_data + img_name
fout_train = train_data + img_name
fout_test = test_data + img_name
src_img = Image.open(fin)
out_img = src_img.resize((width, height), Image.ANTIALIAS).convert('L')
if i < trainset_size:
out_img.save(fout_train, 'png')
else:
out_img.save(fout_test, 'png')
2. 人脸识别
显示图片
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
def plot_image(img_path='test.jpg'):
img = mpimg.imread(img_path)
plt.title("Query Image")
plt.imshow(img)
plt.show()
并行批量识别图片中是否有人脸。
# -*- coding: UTF-8 -*-
import codecs
from multiprocessing import Pool
def parallel_detection(input_path, output_path):
# 1. 计算文件中url数目
total_lines_cnt = iter_count(input_path)
# 2. 划分chunk
texts = read_data_by_chunk(total_lines_cnt, input_path)
cpus = 26 # CPU个数
fw = codecs.open(output_path, "w", "utf-8")
for chunk in texts:
pool = Pool(cpus)
# 根据cpu数切分文本,以便并行处理
step = int(len(chunk) / cpus)
tmp = [chunk[i:i + step] for i in range(0, len(chunk), step)]
# 3. 批量检测图片
results = pool.map(detection_face_in_batch, tmp)
pool.close()
pool.join()
# 4. 识别结果写入
for r in results:
for line in r:
print(line)
fw.write(line)
fw.close()
if __name__ == "__main__":
input_path = "img_urls.txt"
output_path = "result.txt"
# 并行检测url文件中图片是否有人脸,有则打标为1,否为0。
parallel_detection(input_path, output_path)
print('------finished!!!------')
1. 计算文件中url数目
# 统计文件内有多少行文本
def iter_count(file_name):
from itertools import (takewhile, repeat)
buffer = 1024 * 1024
with open(file_name) as f:
buf_gen = takewhile(lambda x: x, (f.read(buffer) for _ in repeat(None)))
return sum(buf.count('\n') for buf in buf_gen)
2. 划分chunk
import codecs
# 小批量读文本数据
def read_data_by_chunk(total_lines_cnt, file_name):
fr = codecs.open(file_name, "r", "utf-8")
trunk = 2 # 每次返回10条数据
i = 0
texts = []
for line in fr:
texts.append(line.strip())
i += 1
if i % trunk == 0 or i == total_lines_cnt:
yield texts
texts = []
fr.close()
3. 批量检测,并写入结果
import datetime
# 小批量检测
def detection_face_in_batch(texts):
result = []
for text in texts:
url = text.strip().split("\t")
if len(url) < 2:
continue
img_url = url[1]
uid = url[0]
label = is_detection_face(img_url)
cur_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
strs = uid+"\t"+str(label)+"\t"+str(cur_time)+"\t"+img_url+"\n"
result.append(strs)
return result
4. 检测图片中是否有人脸
import urllib.request
import face_recognition
# 从url检测图片是有人脸
def is_detection_face(img_url):
api_token = 'ii'
try:
header = {"Authorization": "Bearer " + api_token}
request = urllib.request.Request(img_url, headers=header)
response = urllib.request.urlopen(request)
if response.getcode() == 200:
image = face_recognition.load_image_file(response)
face_locations = face_recognition.face_locations(image,
number_of_times_to_upsample=1, model="hog")
label = 1 if len(face_locations) > 0 else 0
return label
except:
pass
# 检测图片中是否有人脸
def is_detection_face(img_path):
# "1.png"
image = face_recognition.load_image_file(img_path)
face_locations = face_recognition.face_locations(image,
number_of_times_to_upsample=1, model="hog")
# [(10, 81, 46, 45)] 即人脸框左上与右下角坐标
label = 1 if len(face_locations) > 0 else 0
return label
3. 检测人脸年龄性别
11
#-*- coding: UTF-8 -*-
import re
import os
import codecs
import sys
import urllib.request
import numpy as np
import cv2 as cv
import datetime
from PIL import Image
import face_recognition
from pathlib import Path
from multiprocessing import Pool
import argparse
import cv2
import mxnet as mx
from utils import face_preprocess
from utils.mtcnn_detector import MtcnnDetector
def infer(imgg):
parser = argparse.ArgumentParser()
parser.add_argument('--image_size', default='112,112', help='models input size.')
parser.add_argument('--image', default='test.jpg', help='infer image path.')
parser.add_argument('--model', default='model/model,200', help='path to load model.')
parser.add_argument('--mtcnn_model', default='mtcnn-model', help='path to load model.')
parser.add_argument('--gpu', default=0, type=int, help='gpu id')
args = parser.parse_args()
args.gpu = -1
args.image = imgg
# 加载模型
model = FaceAgeGenderModel(args)
# 读取图片
# img = cv2.imread(args.image) #直接预测给定图片
img = cv2.imdecode(np.frombuffer(imgg, np.uint8), cv2.IMREAD_COLOR)
# 检测人脸
faces = []
try:
faces, bboxes, pointses = model.get_faces(img)
except:
pass
if len(faces) == 0:
return "-1"+"\t"+"0"
else:
gender, age = model.get_ga(faces[0])
return str(gender)+"\t"+str(age)
def detection_age_and_gender(img_url):
api_token = 'ii'
try:
header = {"Authorization": "Bearer " + api_token}
request = urllib.request.Request(img_url, headers=header)
response = urllib.request.urlopen(request)
if (response.getcode() == 200):
result = infer(response.read())
print(result)
except:
pass
if __name__ == "__main__":
img_url = 'https://cdnavatar.youni.im/'
detection_age_and_gender(img_url)
# infer("boy.png") # 0:女,1:男
12
class FaceAgeGenderModel:
def __init__(self, args):
self.args = args
if args.gpu >= 0:
ctx = mx.gpu(args.gpu)
else:
ctx = mx.cpu()
_vec = args.image_size.split(',')
assert len(_vec) == 2
image_size = (int(_vec[0]), int(_vec[1]))
self.model = None
if len(args.model) > 0:
self.model = self.get_model(ctx, image_size, args.model, 'fc1')
self.det_minsize = 50
self.det_threshold = [0.6, 0.7, 0.8]
self.image_size = image_size
detector = MtcnnDetector(model_folder=args.mtcnn_model, ctx=ctx, num_worker=1, accurate_landmark=True,
threshold=self.det_threshold)
print("加载模型:%s" % args.mtcnn_model)
self.detector = detector
# 加载模型
def get_model(self, ctx, image_size, model_str, layer):
_vec = model_str.split(',')
assert len(_vec) == 2
prefix = _vec[0]
epoch = int(_vec[1])
print('loading', prefix, epoch)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
sym = all_layers[layer + '_output']
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
model.set_params(arg_params, aux_params)
return model
# 识别人脸
def get_faces(self, face_img):
ret = self.detector.detect_face(face_img)
if ret is None:
return None
bbox, points = ret
if bbox.shape[0] == 0:
return [], [], []
bboxes = []
pointses = []
faces = []
for i in range(len(bbox)):
b = bbox[i, 0:4]
bboxes.append(b)
p = points[i, :].reshape((2, 5)).T
pointses.append(p)
nimg = face_preprocess.preprocess(face_img, b, p, image_size='112,112')
nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
aligned = np.transpose(nimg, (2, 0, 1))
input_blob = np.expand_dims(aligned, axis=0)
data = mx.nd.array(input_blob)
db = mx.io.DataBatch(data=(data,))
faces.append(db)
return faces, bboxes, pointses
# 性别年龄识别
def get_ga(self, data):
self.model.forward(data, is_train=False)
ret = self.model.get_outputs()[0].asnumpy()
g = ret[:, 0:2].flatten()
gender = np.argmax(g)
a = ret[:, 2:202].reshape((100, 2))
a = np.argmax(a, axis=1)
age = int(sum(a))
return gender, age
4. 图片文本检测
os.system('tesseract 22.png 22 --oem 1 -l chi_sim+eng')
22.png: 待检测的图片
22: 检测文本结果写出文件
--oem 1: 1代表用lstm引擎识别, 0表示用传统引擎识别
-l chi_sim+eng: 指定中文字库和英文字库-psm 7 表示告诉图片是一行文本这个参数可以减少识别错误率. 默认为 3。
具体用法可参考: 一个流行的开源OCR软件Tesseract
import os
# 检测图片中是否有文字
def is_detection_text(img_path):
result_file_name = img_path.split(".")[0]
os.system('tesseract ' + img_path + " " + result_file_name + ' --oem 1 -l chi_sim+eng')
with open(result_file_name + '.txt', 'r') as f:
result = ""
for line in f:
result += line.strip().replace(' ', '')
print(result) # 检测到的文本内容
label = 1 if len(result) > 0 else 0
return label
img_path = '22.png'
print is_detection_text(img_path)