(8-5-03)盲点检测:基于深度学习的疲劳驾驶和盲点检测系统

8.5.8  盲点检测模块:单目标检测训练框架

在下面的代码中,使用PyTorch实现单目标检测训练框架,支持多种网络架构(如VGG16、MobileNetV1/V2、SqueezeNet等)。通过命令行参数配置数据集类型、网络结构、学习率等超参数,可以进行模型训练和验证。代码加载指定数据集,创建相应的数据转换和数据加载器,构建目标检测网络,定义损失函数和优化器,支持模型的加载和恢复训练。训练过程中实现了学习率调度和定期验证,并保存验证损失最低的模型检查点。

if __name__ == '__main__':
    timer = Timer()  # 创建计时器对象

    logging.info(args)  # 打印命令行参数信息
    
    # 确保检查点输出目录存在
    if args.checkpoint_folder:
        args.checkpoint_folder = os.path.expanduser(args.checkpoint_folder)

        if not os.path.exists(args.checkpoint_folder):
            os.mkdir(args.checkpoint_folder)
            
    # 选择网络架构和配置     
    if args.net == 'vgg16-ssd':
        create_net = create_vgg_ssd
        config = vgg_ssd_config
    elif args.net == 'mb1-ssd':
        create_net = create_mobilenetv1_ssd
        config = mobilenetv1_ssd_config
    elif args.net == 'mb1-ssd-lite':
        create_net = create_mobilenetv1_ssd_lite
        config = mobilenetv1_ssd_config
    elif args.net == 'sq-ssd-lite':
        create_net = create_squeezenet_ssd_lite
        config = squeezenet_ssd_config
    elif args.net == 'mb2-ssd-lite':
        create_net = lambda num: create_mobilenetv2_ssd_lite(num, width_mult=args.mb2_width_mult)
        config = mobilenetv1_ssd_config
    else:
        logging.fatal("The net type is wrong.")
        parser.print_help(sys.stderr)
        sys.exit(1)
        
    # 为训练/测试/验证创建数据转换
    train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std)
    target_transform = MatchPrior(config.priors, config.center_variance,
                                  config.size_variance, 0.5)

    test_transform = TestTransform(config.image_size, config.image_mean, config.image_std)

    # 加载数据集(可能有多个)
    logging.info("准备训练数据集。")
    datasets = []
    for dataset_path in args.datasets:
        if args.dataset_type == 'voc':
            dataset = VOCDataset(dataset_path, transform=train_transform,
                                 target_transform=target_transform)
            label_file = os.path.join(args.checkpoint_folder, "labels.txt")
            store_labels(label_file, dataset.class_names)
            num_classes = len(dataset.class_names)
        elif args.dataset_type == 'open_images':
            dataset = OpenImagesDataset(dataset_path,
                 transform=train_transform, target_transform=target_transform,
                 dataset_type="train", balance_data=args.balance_data)
            label_file = os.path.join(args.checkpoint_folder, "labels.txt")
            store_labels(label_file, dataset.class_names)
            logging.info(dataset)
            num_classes = len(dataset.class_names)

        else:
            raise ValueError(f"Dataset type {args.dataset_type} is not supported.")
        datasets.append(dataset)
        
    # 创建训练数据集
    logging.info(f"标签已存入文件 {label_file}。")
    train_dataset = ConcatDataset(datasets)
    logging.info("训练数据集大小: {}".format(len(train_dataset)))
    train_loader = DataLoader(train_dataset, args.batch_size,
                              num_workers=args.num_workers,
                              shuffle=True)
                           
    # 创建验证数据集                           
    logging.info("准备验证数据集。")
    if args.dataset_type == "voc":
        val_dataset = VOCDataset(dataset_path, transform=test_transform,
                                 target_transform=target_transform, is_test=True)
    elif args.dataset_type == 'open_images':
        val_dataset = OpenImagesDataset(dataset_path,
                                        transform=test_transform, target_transform=target_transform,
                                        dataset_type="test")
        logging.info(val_dataset)
    logging.info("验证数据集大小: {}".format(len(val_dataset)))

    val_loader = DataLoader(val_dataset, args.batch_size,
                            num_workers=args.num_workers,
                            shuffle=False)
                            
    # 创建网络
    logging.info("构建网络。")
    net = create_net(num_classes)
    min_loss = -10000.0
    last_epoch = -1

    # 冻结某些层(如果请求)
    base_net_lr = args.base_net_lr if args.base_net_lr is not None else args.lr
    extra_layers_lr = args.extra_layers_lr if args.extra_layers_lr is not None else args.lr
    
    if args.freeze_base_net:
        logging.info("冻结基础网络。")
        freeze_net_layers(net.base_net)
        params = itertools.chain(net.source_layer_add_ons.parameters(), net.extras.parameters(),
                                 net.regression_headers.parameters(), net.classification_headers.parameters())
        params = [
            {'params': itertools.chain(
                net.source_layer_add_ons.parameters(),
                net.extras.parameters()
            ), 'lr': extra_layers_lr},
            {'params': itertools.chain(
                net.regression_headers.parameters(),
                net.classification_headers.parameters()
            )}
        ]
    elif args.freeze_net:
        freeze_net_layers(net.base_net)
        freeze_net_layers(net.source_layer_add_ons)
        freeze_net_layers(net.extras)
        params = itertools.chain(net.regression_headers.parameters(), net.classification_headers.parameters())
        logging.info("冻结除预测头之外的所有层。")
    else:
        params = [
            {'params': net.base_net.parameters(), 'lr': base_net_lr},
            {'params': itertools.chain(
                net.source_layer_add_ons.parameters(),
                net.extras.parameters()
            ), 'lr': extra_layers_lr},
            {'params': itertools.chain(
                net.regression_headers.parameters(),
                net.classification_headers.parameters()
            )}
        ]

    # 加载先前的模型检查点(如果请求)
    timer.start("加载模型")
    if args.resume:
        logging.info(f"从模型 {args.resume} 恢复")
        net.load(args.resume)
    elif args.base_net:
        logging.info(f"从基础网络 {args.base_net} 初始化")
        net.init_from_base_net(args.base_net)
    elif args.pretrained_ssd:
        logging.info(f"从预训练的 SSD {args.pretrained_ssd} 初始化")
        net.init_from_pretrained_ssd(args.pretrained_ssd)
    logging.info(f'加载模型耗时: {timer.end("加载模型"):.2f} 秒。')

    # 将模型移到 GPU
    net.to(DEVICE)

    # 定义损失函数和优化器
    criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3,
                             center_variance=0.1, size_variance=0.2, device=DEVICE)
    optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum,
                                weight_decay=args.weight_decay)
    logging.info(f"学习率: {args.lr}, 基础网络学习率: {base_net_lr}, "
                 + f"额外层学习率: {extra_layers_lr}.")

    # 设置学习率策略
    if args.scheduler == 'multi-step':
        logging.info("使用 MultiStepLR 调度器。")
        milestones = [int(v.strip()) for v in args.milestones.split(",")]
        scheduler = MultiStepLR(optimizer, milestones=milestones,
                                                     gamma=0.1, last_epoch=last_epoch)
    elif args.scheduler == 'cosine':
        logging.info("使用 CosineAnnealingLR 调度器。")
        scheduler = CosineAnnealingLR(optimizer, args.t_max, last_epoch=last_epoch)
    else:
        logging.fatal(f"不支持的调度器: {args.scheduler}。")
        parser.print_help(sys.stderr)
        sys.exit(1)

    # 训练所需的 epochs 数量
    logging.info(f"从 epoch {last_epoch + 1} 开始训练。")
    
    for epoch in range(last_epoch + 1, args.num_epochs):
        scheduler.step()
        train(train_loader, net, criterion, optimizer,
              device=DEVICE, debug_steps=args.debug_steps, epoch=epoch)
        
        if epoch % args.validation_epochs == 0 or epoch == args.num_epochs - 1:
            val_loss, val_regression_loss, val_classification_loss = test(val_loader, net, criterion, DEVICE)
            logging.info(
                f"Epoch: {epoch}, " +
                f"验证损失: {val_loss:.4f}, " +
                f"验证回归损失 {val_regression_loss:.4f}, " +
                f"验证分类损失: {val_classification_loss:.4f}"
            )
            model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{val_loss}.pth")
            net.save(model_path)
            logging.info(f"保存模型 {model_path}")

    logging.info("任务完成,退出程序。")

通过如下命令运行SSD模型:

python3 train_ssd.py --dataset-type=voc --data=data/images/ --model-dir=models/ --batch-size=4 --epochs=10

然后通过如下命令将.h5模型转换为.onnx模型:

python3 ssd_onnx_export.py --model-dir=models/

8.5.9  盲点检测模块:运行检测功能

编写文件run.py,执行疲劳驾驶检测和盲点检测功能。通过使用视觉处理技术,包括Mediapipe、OpenCV、Jetson Inference等库,对驾驶员的疲劳状态和盲点进行实时监测。系统通过摄像头捕捉视频流,利用人脸检测、眼睛检测以及深度学习模型推理,实时判断驾驶员是否疲劳以及车辆是否存在盲点,并通过GPIO控制LED和蜂鸣器进行实时报警。文件run.py的具体实现流程如下所示。

(1)首先进行GPIO引脚的设置工作,包括设置蜂鸣器和LED的引脚,并初始化为高电平。然后,使用OpenCV的字体设置和ONNX Runtime加载深度学习模型。接着,通过Jetson Inference库加载目标检测模型(SSD-Mobilenet)和Face Mediapipe库的人脸检测模型。通过串口连接Arduino设备,设置输入图像的大小。接下来,通过Haarcascade级联分类器加载眼睛检测器,并初始化一些状态和计数器。最后,启动两个视频流,并通过这些流进行实时的疲劳驾驶检测和盲点检测。

GPIO.setmode(GPIO.BOARD)
GPIO.setwarnings(False)
channel_buzzer = 11
channel_led = 18
channel_led1 = 21
GPIO.setup(channel_buzzer, GPIO.OUT)
GPIO.setup(channel_led, GPIO.OUT)
GPIO.setup(channel_led1, GPIO.OUT)
GPIO.output(channel_buzzer, GPIO.HIGH)
GPIO.output(channel_led, GPIO.HIGH)
GPIO.output(channel_led1, GPIO.HIGH)


# text font
font = cv2.FONT_HERSHEY_SIMPLEX 

# Load model
sess = onnxruntime.InferenceSession("models/cnn/model_cnn.onnx", providers=["CUDAExecutionProvider"])
net = jetson.inference.detectNet(argv=['--model=models/ssd/ssd-mobilenet.onnx', '--labels=models/ssd/labels.txt', '--input-blob=input_0', '--output-cvg=scores', '--output-bbox=boxes'], threshold=0.5)
# net = jetson.inference.detectNet("ssd-mobilenet-v2", threshold=0.5)

ser = serial.Serial('/dev/ttyACM0', baudrate=9600, timeout=.1)

# Input image size
image_width = 24
image_height = 24

# Face mediapipe
mp_face_detection = mp.solutions.face_detection
face_detection = mp_face_detection.FaceDetection()

# Eyes haarcascade
r_eye_cascade = cv2.CascadeClassifier("haarcascades/haarcascade_lefteye_2splits.xml")
l_eye_cascade = cv2.CascadeClassifier("haarcascades/haarcascade_righteye_2splits.xml")

# Eyes state
labels = ' '

# Init value
# Eyes value
l_val = [99]
r_val = [99]
e_val = 0 
e_cnt = 0

# Face value
f_pos = 0
f_cnt = 0

stream1 = VideoGear(source=0, logging=True).start() 
stream2 = VideoGear(source=1, logging=True).start()

(2)定义函数drowsiness_detection(frame),功能是进行疲劳驾驶检测,包括人脸和眼睛的检测,计算眼睛的状态指标,根据情况触发报警。在检测时将输入帧转换为灰度图,使用Mediapipe检测人脸,Haarcascade检测眼睛,通过ONNX Runtime评估眼睛状态指标。根据人脸和眼睛的存在情况触发相应的疲劳驾驶警报。

def drowsiness_detection(frame):

    global labels, l_val, r_val, e_val, e_cnt, f_pos, f_cnt
    # frame_height, frame_width = frame.shape[:2]

    # Convert frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Face detection
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_detection.process(image)
    # print(len(results.detections))
    if results.detections:
        face_exist = True
        for detection in results.detections:
            bboxC = detection.location_data.relative_bounding_box
            ih, iw, ic = frame.shape
            bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
            xcenter = int(bboxC.xmin * iw + (bboxC.width * iw)/2) 
            ycenter = int(bboxC.ymin * ih + (bboxC.height * ih)/2)
            cv2.rectangle(frame, bbox, (0, 0, 255), 2)
            # cv2.circle(frame, (xcenter, ycenter), 2, (0, 0, 0), 2)
            # print(len(results.detections))
    else:
        face_exist = False
        xcenter = None 
        ycenter = None
    # print(face_exist)

    # Detect eyes
    left_eye = l_eye_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5, minSize=(30, 30))
    right_eye = r_eye_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5, minSize=(30, 30))

    # eyes detection
    for (x, y, w, h) in left_eye:
        l_eye = frame[y:y+h, x:x+w]
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        l_eye = cv2.cvtColor(l_eye, cv2.COLOR_BGR2GRAY)
        l_eye = cv2.resize(l_eye, (image_width, image_height))
        l_eye = l_eye/255
        l_eye = l_eye.reshape(image_width, image_height, -1)
        l_eye = np.array(l_eye, dtype="float32")
        l_eye = np.expand_dims(l_eye, axis=0)
        l_eye = np.vstack([l_eye])
        l_val = sess.run(["dense_1"], {"input": l_eye})

    for (x, y, w, h) in right_eye:
        r_eye = frame[y:y+h,x:x+w]
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        r_eye = cv2.cvtColor(r_eye, cv2.COLOR_BGR2GRAY)
        r_eye = cv2.resize(r_eye, (image_width, image_height))
        r_eye = r_eye/255
        r_eye = r_eye.reshape(image_width, image_height, -1)
        r_eye = np.array(r_eye, dtype="float32")
        r_eye = np.expand_dims(r_eye, axis=0)
        r_eye = np.vstack([r_eye])
        r_val = sess.run(["dense_1"], {"input": r_eye})

    if len(left_eye) and len(right_eye):
        eyes_exist = True 
    else:
        eyes_exist = False

    e_val = ((l_val[0] + r_val[0])/2)
    f_pos = xcenter
    # print(f_pos)

    # 报警
    # Case 1
    if face_exist and eyes_exist:
        GPIO.output(channel_led, GPIO.LOW)
        # cv2.putText(frame, "Truong hop 1", (50, 460), font, 2, (255, 255, 255), 2, cv2.LINE_AA)
        f_cnt = 0
        if e_val < 0.7:
            e_cnt += 1
            if e_cnt > 7:
                e_cnt = 7
                cv2.putText(frame, 'ALERT!!!', (50, 100), font, 2, (0, 0, 255), 2, cv2.LINE_AA)
                GPIO.output(channel_buzzer, GPIO.LOW)
                time.sleep(0.05)
                labels = 'Mat nham - Canh bao ngu ngat'
        else:
            e_cnt -= 5
            if e_cnt < 0:
                e_cnt = 0
                GPIO.output(channel_buzzer, GPIO.HIGH)
                time.sleep(0.05)
                labels = 'Mat mo - Binh Thuong'

    # Case 2
    elif face_exist and not eyes_exist:
        # cv2.putText(frame, "Truong hop 2", (50, 460), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
        e_cnt = 0
        if f_pos > 200 and f_pos < 400:
            f_cnt += 3
            if f_cnt > 10:
                f_cnt = 10
                cv2.putText(frame, 'ALERT!!!', (50, 100), font, 2, (0, 0, 255), 2, cv2.LINE_AA)
                GPIO.output(channel_buzzer, GPIO.LOW)
                time.sleep(0.05)
                labels = 'Khong phat hien mat - Canh bao ngu gat'
        else:
            f_cnt += 2
            if f_cnt > 20:
                f_cnt = 20
                cv2.putText(frame, 'ALERT!!!', (50, 100), font, 2, (0, 0, 255), 2, cv2.LINE_AA)
                GPIO.output(channel_buzzer, GPIO.LOW)
                time.sleep(0.05)
                labels = 'Khong phat hien mat - Canh bao mat tap trung'
    
    # Case 3
    elif not face_exist and not eyes_exist:
        GPIO.output(channel_buzzer, GPIO.HIGH)
        GPIO.output(channel_led, GPIO.HIGH)
        time.sleep(0.05)
        labels = ' '
        e_cnt = 0
        f_cnt = 0
    
    # print(f'eye count: {e_cnt}, face count: {f_cnt}')
    # print(face_exist, eyes_exist)
    
    cv2.rectangle(frame, (0, 0), (640, 50), (0, 0, 0), thickness=-1)
    cv2.rectangle(frame, (0, 430), (640, 480), (0, 0, 0), thickness=-1)
    cv2.putText(frame, labels, (30, 30), font, 0.75, (255, 255, 255), 2, cv2.LINE_AA)
    cv2.putText(frame, 'Eye counter: ' + str(e_cnt), (30, 460), font, 0.75, (255, 255, 255), 2, cv2.LINE_AA)
    cv2.putText(frame, 'Face counter: ' + str(f_cnt), (400, 460), font, 0.75, (255, 255, 255), 2, cv2.LINE_AA)

(3)定义函数blindspot_detection(frame)实现盲点检测功能,使用库Jetson Inference的SSD-Mobilenet模型检测帧中的目标,并通过GPIO控制LED指示器进行警报。在检测时将输入帧传递给Jetson Inference的目标检测模型,检测目标并通过GPIO控制LED指示器触发盲点检测的报警。根据检测到的目标类别发送相应的信息到串口。

def blindspot_detection(frame):
    img_cuda = jetson.utils.cudaFromNumpy(frame)
    detections = net.Detect(img_cuda)
    if detections:
        GPIO.output(channel_led1, GPIO.LOW)
        time.sleep(0.05)
        GPIO.output(channel_led1, GPIO.HIGH)
        for d in detections:
            x1, y1, x2, y2 = int(d.Left), int(d.Top), int(d.Right), int(d.Bottom)
            class_name = net.GetClassDesc(d.ClassID)
            # print(class_name)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            cv2.putText(frame, class_name + " " + str(round(d.Confidence, 3)), (x1+5, y1+25), font, 0.75, (255, 0, 0), 2) 
            if class_name == "nguoi": 
                ser.write(bytes('person\n','utf-8'))
            elif class_name == "xemay":
                ser.write(bytes('motorbike\n','utf-8'))
            elif class_name == "oto":
                ser.write(bytes('car\n','utf-8')) 
    else:
        GPIO.output(channel_led1, GPIO.HIGH)
        time.sleep(0.05)

(4)通过如下代码实现一个双摄像头检测系统,通过两个并行的视频流进行实时的疲劳驾驶检测和盲点检测。其中,drowsiness_detection函数负责检测驾驶员的疲劳状态,通过人脸和眼睛检测,根据状态触发报警;blindspot_detection函数则用于盲点检测,通过Jetson Inference库的SSD-Mobilenet模型检测目标,并触发相应的报警。整个系统在一个无限循环中运行,通过串口通信发送警报信息,同时实时显示两个摄像头的合并视频流,并通过按键输入退出系统。

while True:
    
    frameA = stream1.read()
    drowsiness_detection(frameA)
    frameB = stream2.read()
    blindspot_detection(frameB)

    
    # check if any of two frame is None
    if frameA is None or frameB is None:
        #if True break the infinite loop
        break
    
    frame = np.hstack((frameA, frameB))
    # do something with both frameA and frameB here
    # cv2.imshow("Output Frame1", frameA)
    # cv2.imshow("Output Frame2", frameB)
    cv2.imshow("Output", frame)

    # Show output window of stream1 and stream 2 seperately

    key = cv2.waitKey(1) & 0xFF
    # check for 'q' key-press
    if key == ord("q"):
        GPIO.cleanup()
        #if 'q' key-pressed break out
        break

cv2.destroyAllWindows()
# close output window

# safely close both video streams
stream1.stop()
stream2.stop()

本项目已完结:

(8-5-01)盲点检测:基于深度学习的疲劳驾驶和盲点检测系统-CSDN博客

(8-5-02)盲点检测:基于深度学习的疲劳驾驶和盲点检测系统-CSDN博客

  • 21
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

码农三叔

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值