一、HLS手写体识别部分
1、一 系统分析
1、手写体输入为28x28的黑白图片,所以输入为784个
2、输出为识别0-9的数字的概率,所以有10个输出
3、输入只能是-1~1的小数,主要是防止计算溢出
3、通过HLS 编写全连接神经网络传入权重参数和偏置参数文件
2.2 编写C语言的全连接算子
头文件导入 :
#include <stdio.h>
#include "HLS/hls.h"
#include "input_0.h"//十幅图片
#include "input_1.h"
#include "input_2.h"
#include "input_3.h"
#include "input_4.h"
#include "input_5.h"
#include "input_6.h"
#include "input_7.h"
#include "input_8.h"
#include "input_9.h"
#include "layer1_bias.h" //第一层偏置常数
#include "layer1_weight.h" //第一层权重
#include "layer2_bias.h" //第二层偏置常数
#include "layer2_weight.h" //第二层权重值
4、编写代码
Slave Interfaces
#include <HLS/hls.h>
#include <stdio.h>
hls_avalon_slave_component
component int dut(int a,int b)
{
return a*b;
}
int main()
{
int a=2;
int b=3;
int y;
y = dut(a,b);
printf("y=%d",y);
return 0;
}
hls_avalon_slave_register_argument
#include <HLS/hls.h>
#include <stdio.h>
hls_avalon_slave_component
component int dut(
int a,
hls_avalon_slave_register_argument int b)
{
return a*b;
}
int main()
{
int a=2;
int b=3;
int y;
y = dut(a,b);
printf("y=%d",y);
return 0;
}
slave_memory_argument
#include <HLS/hls.h>
#include <HLS/stdio.h>
hls_avalon_slave_component
component int dut(
hls_avalon_slave_memory_argument(5*sizeof(int)) int *a,
hls_avalon_slave_memory_argument(5*sizeof(int)) int *b
)
{
int i;
int sum=0;
for(i=0;i<5;i++)
{
sum = sum + a[i] * b[i];
//printf("a[%d]%d",i,a[i]);
}
return sum;
}
int main()
{
int a[5] = {1,2,3,4,5};
int b[5] = {1,2,3,4,5};
int sum;
sum = dut(a,b);
printf("sum=%d",sum);
return 0;
}
全连接代码
#include <stdio.h>
#include "HLS/hls.h"
#include "input_0.h"//十幅图片
#include "input_1.h"
#include "input_2.h"
#include "input_3.h"
#include "input_4.h"
#include "input_5.h"
#include "input_6.h"
#include "input_7.h"
#include "input_8.h"
#include "input_9.h"
#include "layer1_bias.h" //第一层偏置常数
#include "layer1_weight.h" //第一层权重
#include "layer2_bias.h" //第二层偏置常数
#include "layer2_weight.h" //第二层权重值
hls_avalon_slave_component component
int my_predit(
hls_avalon_slave_memory_argument(784*sizeof(float)) float *img,
hls_avalon_slave_memory_argument(64*sizeof(float)) float *b1,
hls_avalon_slave_memory_argument(784*64*sizeof(float)) float *w1,
hls_avalon_slave_memory_argument(10*sizeof(float)) float *b2,
hls_avalon_slave_memory_argument(64*10*sizeof(float)) float *w2){
float res1[64]={0},res2[10]={0}; //创建两个浮点数数组 yongyu
//循环1
/* w1权重在 layer1_weight.h 中按照一行64个,784列顺序排列,
但实际上是一维数组,我们计算第一层64个神经元的输出*/
for (int i = 0; i < 64; i++)
{
for (int j = 0; j < 784; j++)
{
res1[i] = res1[i]+ img[j] * w1[i+j*64]; //w1x1+w2x2 ... wnxn+b
}
res1[i] +=b1[i]; //得到第一层的输出
//printf("%f \n",res1[i]);
}
//循环2
for (int i = 0; i < 10; i++)
{
for (int j = 0; j < 64; j++)
{
res2[i] = res2[i]+ res1[j] * w2[i+j*10]; //输入第一层的输出
}
res2[i] +=b2[i];
//printf("%f \n",res2[i]);
}
//输出
float temp = 0; //用一个中间值来寄存特征值最大值
int res3;
for (int i = 0; i < 10; i++)
{
//printf("%f \n",res2[i]);
if (res2[i] > temp) //比较10个特征值,找出最大值
{
temp = res2[i];
res3 = i; //res3的值即为输出层数组中特征值最大值对应的下标 ,也是我们想要的结果
}
}
return res3; //最后返回i,即是我们的预测结果
}
int main()
{
//用指针数组来表示10幅图片
float *a[10] = {input_0,input_1,input_2,input_3,input_4,input_5,input_6,input_7,input_8,input_9};
for (int i = 0; i < 10; i++) //循环输出训练结果
{
int res = my_predit(a[i],layer1_bias,layer1_weight,layer2_bias,layer2_weight);//调用函数输出返回值
printf("input_%d.h预测结果为:%d\n",i,res);
}
return 0;
}
2 添加神经网络IP到工程并连线后编译运行
设计软件
代码
/*
* full.c
*
* Created on: 2022年7月27日
* Author: 药石无医
*/
#include "layer1_bias.h"
#include "layer1_weight.h"
#include "layer2_bias.h"
#include "layer2_weight.h"
#include "input_0.h"//十幅图片
#include "input_1.h"
#include "input_2.h"
#include "input_3.h"
#include "input_4.h"
#include "input_5.h"
#include "input_6.h"
#include "input_7.h"
#include "input_8.h"
#include "input_9.h"
//gcc标准头文件
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <stdlib.h>
//HPS厂家提供的底层定义头文件
#define soc_cv_av //开发平台Cyclone V 系列
#include "hwlib.h"
#include "socal/socal.h"
#include "socal/hps.h"
//与用户具体的HPS 应用系统相关的硬件描述头文件
#include "hps_0.h"
#define HW_REGS_BASE (ALT_STM_OFST) //HPS外设地址段基地址
#define HW_REGS_SPAN (0x04000000) //HPS外设地址段地址空间 64MB大小
#define HW_REGS_MASK (HW_REGS_SPAN - 1) //HPS外设地址段地址掩码
static volatile unsigned long long *dout = NULL;
static float *img_virtual_base = NULL;
static float *b1_virtual_base = NULL;
static float *b2_virtual_base = NULL;
static float *w1_virtual_base = NULL;
static float *w2_virtual_base = NULL;
int full_init(int *virtual_base){
int fd;
void *virtual_space;
//使能mmu
if((fd = open("/dev/mem",(O_RDWR | O_SYNC))) == -1){
printf("can't open the file");
return fd;
}
//映射用户空间
virtual_space = mmap(NULL,HW_REGS_SPAN,(PROT_READ | PROT_WRITE),MAP_SHARED,fd,HW_REGS_BASE);
//得到偏移的外设地址
dout = virtual_space + ((unsigned)(ALT_LWFPGASLVS_OFST+PREDIT_0_MY_PREDIT_INTERNAL_INST_AVS_CRA_BASE)
&(unsigned)(HW_REGS_MASK));
b1_virtual_base = virtual_space + ((unsigned)(ALT_LWFPGASLVS_OFST+PREDIT_0_MY_PREDIT_INTERNAL_INST_AVS_B1_BASE)
&(unsigned)(HW_REGS_MASK));
b2_virtual_base = virtual_space + ((unsigned)(ALT_LWFPGASLVS_OFST+PREDIT_0_MY_PREDIT_INTERNAL_INST_AVS_B2_BASE)
&(unsigned)(HW_REGS_MASK));
w1_virtual_base = virtual_space + ((unsigned)(ALT_LWFPGASLVS_OFST+PREDIT_0_MY_PREDIT_INTERNAL_INST_AVS_W1_BASE)
&(unsigned)(HW_REGS_MASK));
w2_virtual_base = virtual_space + ((unsigned)(ALT_LWFPGASLVS_OFST+PREDIT_0_MY_PREDIT_INTERNAL_INST_AVS_W2_BASE)
&(unsigned)(HW_REGS_MASK));
img_virtual_base = virtual_space + ((unsigned)(ALT_LWFPGASLVS_OFST+PREDIT_0_MY_PREDIT_INTERNAL_INST_AVS_IMG_BASE)
&(unsigned)(HW_REGS_MASK));
*virtual_base = virtual_space;
return fd ;
}
int main(){
int fd,virtual_base,i;
fd = full_init(&virtual_base);
float *image[10] = {input_0,input_1,input_2,input_3,input_4,input_5,input_6,input_7,input_8,input_9};
//先将权重和偏置赋值
memcpy(w1_virtual_base,layer1_weight,784*64*sizeof(float));
memcpy(b1_virtual_base,layer1_bias,64*sizeof(float));
memcpy(w2_virtual_base,layer2_weight,64*10*sizeof(float));
memcpy(b2_virtual_base,layer2_bias,10*sizeof(float));
//一层for循环输出十张图片的值
for(i=0;i<10;i++)
{
memcpy(img_virtual_base,image[i],784*sizeof(float));
while((*(dout + 0)&(unsigned)1) != 0);
*(dout + 2) = 1;
*(dout + 3) = 1;
*(dout + 1) = 1;
while((*(dout + 3) & 0x2) == 0 );
printf("input:%d 预测结果:%d \n",i,*(dout + 4));
*(dout + 1) = 0;
}
//取消映射
//取消地址映射
if(munmap(virtual_base,HW_REGS_SPAN)==-1){
printf("取消映射失败..\n");
close(fd);
}
//关闭mmu
close(fd);
return 0;
}
运行结果:
二、口罩识别部分
1、环境准备
一、工具及环境要求
工具
labelimg
AI Studio
YOLO2COCO
PaddleUtils
paddleyolo
本地环境要求
openvino2022.2.0
paddle2onnx1.0.5
paddlepaddle2.4.2
opencv-python4.2.0.32
onnx1.11.0
tensorflow2.9.1
安装相关代码
workon 环境 激活环境
pip install labelimg 安装labelimg
pip install openvino-dev[onnx,tensorflow]==2022.2.0
pip install paddle2onnx==1.0.5 -i https://pypi.tuna.tsinghua.edu.cn/simple/
2、数据准备
创建个项目文件夹
图片放images里面
启动labelimg打标签
打完标签后,编写代码生成train.txt,val.txt
import os
def train_val(labels_path, data_path, ratio=0.3):
nomask_num = 0#计数nomask的数量
mask_num = 0#计数mask的数量
image_dir = "\\".join(data_path.split("\\")[-3:]) + "\\images"#根据yolo2coco要求制定路径
txt_files = os.listdir(labels_path)
f_train = open("train.txt", "w")
f_val = open("val.txt", "w")
m = 0
n = 0
for txt in txt_files:
f_txt = open(os.path.join(labels_path, txt), 'r')#打开txt文件
if f_txt.read()[0] == "0":#读取每个文件的第一行,判断是nomask(0)还是mask(1)
nomask_num += 1#不戴口罩加1
else:
mask_num += 1#戴口罩加1
f_txt.close()
for txt in txt_files:
f_txt = open(os.path.join(labels_path, txt), 'r')
if f_txt.read()[0] == "0":#读取每个文件的第一行,判断是nomask(0)还是mask(1)
n += 1
if n >= int(nomask_num * ratio):
f_train.writelines(image_dir+"\\" + txt.split(".")[0] + ".jpg" + "\n")#往文件里面写路径,记得换行
else:
f_val.writelines(image_dir+"\\" + txt.split(".")[0] + ".jpg" + "\n")#往文件里面写路径,记得换行
else:
m += 1
if m >= int(mask_num * ratio):
f_train.writelines(image_dir+"\\" + txt.split(".")[0] + ".jpg" + "\n")#往文件里面写路径,记得换行
else:
f_val.writelines(image_dir+"\\" + txt.split(".")[0] + ".jpg" + "\n")#往文件里面写路径,记得换行
f_txt.close()
f_train.close()
f_val.close()
if __name__ == "__main__":
data_path = os.path.join(os.getcwd(), 'mask')#获取文件夹mask的绝对路径
labels_path = os.path.join(data_path, "labels")#获取labels文件夹的绝对路径
train_val(labels_path=labels_path, data_path=data_path, ratio=0.3)
数据转换
python yolov5_2_coco.py --dir_path E:\paddle_openvino\YOLO2COCO\dataset\yolo_mask
模型训练
利用百度飞桨分别进行以下操作
一、创建数据集
因为数据集过大,启动一个项目,然后直接上传是不行的,所以先创建一个数据集,再在数据集基础上创建项目
二、创建项目
三、模型配置
在configs下面有很多模型,文件夹名就是模型名字,除了可以使用ppyoloe,还有yolov5,yolov8等。
四、模型训练
因为我们是基础版本环境,没有GPU,所以需要切换环境。如果你像下面这样切换环境,大概率你是切换不了的,因为文件过多过大。
成功后倒出模型
模型转换
1、可视化模型
模型减支
模型减支的目的就是从输入到两个输出concat_14.tmp_0,tmp_16为止,后面的节点都删掉。在模型减支工具的paddle目录下打开cmd。
减支后
模型转化
先把paddle模型转换为onnx,需要在环境里面提前安装paddle2onnx。执行以下命令。
paddle2onnx --model_dir export_model --model_filename model.pdmodel --params_filename model.pdiparams --input_shape_dict "{'image':[1,3,640,640]}" --opset_version 11 --save_file ppyoloe_crn_s_80.onnx
模型推理
增加一个inference.ipynb用于编写推理代码,注意放置位置。
推理代码
from openvino.runtime import Core
import openvino.runtime as ov
import cv2 as cv
import numpy as np
import tensorflow as tf
OpenVINO 模型推理器(class)
class Predictor:
"""
OpenVINO 模型推理器
"""
def __init__(self, model_path):
ie_core = Core()
model = ie_core.read_model(model=model_path)
self.compiled_model = ie_core.compile_model(model=model, device_name="CPU")
def get_inputs_name(self, num):
return self.compiled_model.input(num)
def get_outputs_name(self, num):
return self.compiled_model.output(num)
def predict(self, input_data):
return self.compiled_model([input_data])
def get_request(self):
return self.compiled_model.create_infer_request()
图像预处理
def process_image(input_image, size):
"""输入图片与处理方法,按照PP-Yoloe模型要求预处理图片数据
Args:
input_image (uint8): 输入图片矩阵
size (int): 模型输入大小
Returns:
float32: 返回处理后的图片矩阵数据
"""
max_len = max(input_image.shape)
img = np.zeros([max_len,max_len,3],np.uint8)
img[0:input_image.shape[0],0:input_image.shape[1]] = input_image # 将图片放到正方形背景中
img = cv.cvtColor(img,cv.COLOR_BGR2RGB) # BGR转RGB
img = cv.resize(img, (size, size), cv.INTER_NEAREST) # 缩放图片
img = np.transpose(img,[2, 0, 1]) # 转换格式
img = img / 255.0 # 归一化
img = np.expand_dims(img,0) # 增加维度
return img.astype(np.float32)
图像后处理
def process_result(box_results, conf_results):
"""按照PP-Yolove模型输出要求,处理数据,非极大值抑制,提取预测结果
Args:
box_results (float32): 预测框预测结果
conf_results (float32): 置信度预测结果
Returns:
float: 预测框
float: 分数
int: 类别
"""
conf_results = np.transpose(conf_results,[0, 2, 1]) # 转置
# 设置输出形状
box_results =box_results.reshape(8400,4)
conf_results = conf_results.reshape(8400,2)
scores = []
classes = []
boxes = []
for i in range(8400):
conf = conf_results[i,:] # 预测分数
score = np.max(conf) # 获取类别
# 筛选较小的预测类别
if score > 0.5:
classes.append(np.argmax(conf))
scores.append(score)
boxes.append(box_results[i,:])
scores = np.array(scores)
boxes = np.array(boxes)
result_box = []
result_score = []
result_class = []
# 非极大值抑制筛选重复的预测结果
if len(boxes) != 0:
# 非极大值抑制结果
indexs = tf.image.non_max_suppression(boxes,scores,len(scores),0.25,0.35)
for i, index in enumerate(indexs):
result_score.append(scores[index])
result_box.append(boxes[index,:])
result_class.append(classes[index])
# 返回结果
return np.array(result_box),np.array(result_score),np.array(result_class)
画出预测框
def draw_box(image, boxes, scores, classes, labels):
"""将预测结果绘制到图像上
Args:
image (uint8): 原图片
boxes (float32): 预测框
scores (float32): 分数
classes (int): 类别
lables (str): 标签
Returns:
uint8: 标注好的图片
"""
colors = [(0, 0, 255), (0, 255, 0)]
scale = max(image.shape) / 640.0 # 缩放比例
if len(classes) != 0:
for i in range(len(classes)):
box = boxes[i,:]
x1 = int(box[0] * scale)
y1 = int(box[1] * scale)
x2 = int(box[2] * scale)
y2 = int(box[3] * scale)
label = labels[classes[i]]
score = scores[i]
cv.rectangle(image, (x1, y1), (x2, y2), colors[classes[i]], 2, cv.LINE_8)
cv.putText(image,label+":"+str(score),(x1,y1-10),cv.FONT_HERSHEY_SIMPLEX, 0.55, colors[classes[i]], 2)
return image
读取标签
def read_label(label_path):
with open(label_path, 'r') as f:
labels = f.read().split()
return labels
同步推理
label_path = "labels.txt"
yoloe_model_path = "ppyoloe_crn_s_80.xml"
predictor = Predictor(model_path = yoloe_model_path)
boxes_name = predictor.get_outputs_name(0)
conf_name = predictor.get_outputs_name(1)
labels = read_label(label_path=label_path)
cap = cv.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read()
frame = cv.flip(frame, 180)
cv.namedWindow("MaskDetection", 0) # 0可调大小,注意:窗口名必须imshow里面的一窗口名一直
cv.resizeWindow("MaskDetection", 640, 480) # 设置长和宽
input_frame = process_image(frame, 640)
results = predictor.predict(input_data=input_frame)
boxes, scores, classes = process_result(box_results=results[boxes_name], conf_results=results[conf_name])
result_frame = draw_box(image=frame, boxes=boxes, scores=scores, classes=classes, labels=labels)
cv.imshow('MaskDetection', result_frame)
key = cv.waitKey(1)
if key == 27: #esc退出
break
cap.release()
cv.destroyAllWindows()
异步推理
label_path = "labels.txt"
yoloe_model_path = "ppyoloe_crn_s_80.xml"
predictor = Predictor(model_path = yoloe_model_path)
input_layer = predictor.get_inputs_name(0)
labels = read_label(label_path=label_path)
cap = cv.VideoCapture(0)
curr_request = predictor.get_request()
next_request = predictor.get_request()
ret, frame = cap.read()
curr_frame = process_image(frame, 640)
curr_request.set_tensor(input_layer, ov.Tensor(curr_frame))
curr_request.start_async()
while cap.isOpened():
ret, next_frame = cap.read()
next_frame = cv.flip(next_frame, 180)
cv.namedWindow("MaskDetection", 0) # 0可调大小,注意:窗口名必须imshow里面的一窗口名一直
cv.resizeWindow("MaskDetection", 640, 480) # 设置长和宽
in_frame = process_image(next_frame, 640)
next_request.set_tensor(input_layer, ov.Tensor(in_frame))
next_request.start_async()
if curr_request.wait_for(-1) == 1:
boxes_name = curr_request.get_output_tensor(0).data
conf_name = curr_request.get_output_tensor(1).data
boxes, scores, classes = process_result(box_results=boxes_name, conf_results=conf_name)
frame = draw_box(image=frame, boxes=boxes, scores=scores, classes=classes, labels=labels)
cv.imshow('MaskDetection', frame)
frame = next_frame
curr_request, next_request = next_request, curr_request
key = cv.waitKey(1)
if key == 27: #esc退出
break
cap.release()
cv.destroyAllWindows()