基于ros收发的yolov5

最新推荐文章于 2024-08-29 06:00:00 发布

OrigamiSun

最新推荐文章于 2024-08-29 06:00:00 发布

阅读量2.9k

点赞数 2

文章标签：深度学习人工智能

本文链接：https://blog.csdn.net/OrigamiSun/article/details/123269882

版权

一、标注文件

使用labelme对样本进行标注，生成json文件，使用如下代码（VocToBbox.py）生成txt标签文档。

# -*- coding:utf-8 -*-
import json
import os 
from os import listdir, getcwd
from os.path import join
import os.path
import glob
rootdir='./2021-12-06/'#写自己存放图片的数据地址
labeldir='./label/'
def position(pos):#该函数用来找出xmin,ymin,xmax,ymax即bbox包围框
    x=[]
    y=[]
    nums=len(pos)
    for i in range(nums):
        x.append(pos[i][0])
        y.append(pos[i][1])
    x_max=max(x)
    x_min=min(x)
    y_max=max(y)
    y_min=min(y)
    b=(float(x_min),float(x_max),float(y_min),float(y_max))
    return b
 
def convert(size, box):#该函数将xmin,ymin,xmax,ymax转为x,y,w,h中心点坐标和宽高
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)
 
def convert_annotation(image_id):   
    load_f=open(rootdir+"%s.json"%(image_id),'r')#导入json标签的地址
    load_dict = json.load(load_f)
    out_file = open(labeldir+'%s.txt'%(image_id), 'w')#输出标签的地址
    #keys=tuple(load_dict.keys()) 
    w=load_dict['imageWidth']#原图的宽，用于归一化
    h=load_dict['imageHeight']
    #print(h)
    objects=load_dict['shapes']
    nums=len(objects)
    #print(nums)
    #object_key=tuple(objects.keys()
   
    
    
    for i in range(0,nums):
        labels=objects[i]['label']
        #print(i)
        if (labels in ['pedestrian']):
            #print(labels)
            pos=objects[i]['points']   
            b=position(pos)
            bb = convert((w,h), b)
            cls_id=0
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
            #print(type(pos))
        elif (labels in ['cyclist']):
            #print(labels)
            pos=objects[i]['points']
            b=position(pos)
            bb = convert((w,h), b)
            cls_id=1
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        elif (labels in ['car']):
            #print(labels)
            pos=objects[i]['points']
            b=position(pos)
            bb = convert((w,h), b)
            cls_id=2
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        elif (labels in ['bus']):
            #print(labels)
            pos=objects[i]['points']
            b=position(pos)
            bb = convert((w,h), b)
            cls_id=3
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        elif (labels in ['truck']):
            #print(labels)
            pos=objects[i]['points']
            b=position(pos)
            bb = convert((w,h), b)
            cls_id=4
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        elif (labels in ['traffic_light']):
            #print(labels)
            pos=objects[i]['points']
            b=position(pos)
            bb = convert((w,h), b)
            cls_id=5
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        elif (labels in ['traffic_sign']):
            #print(labels)
            pos=objects[i]['points']
            b=position(pos)
            bb = convert((w,h), b)
            cls_id=6
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
 
def image_id(rootdir):
    a=[]
    paths = glob.glob(os.path.join(rootdir, '*.json'))
    paths.sort()
    #print(paths)
    for filename in paths:
            filename=filename.split(rootdir)[1].split('.json')[0]
            print(filename)
            a.append(filename)
    return a
names=image_id(rootdir)
for image_id in names:
    print('image_id:',image_id)
    convert_annotation(image_id)

若样本图片与标签json文件数目不一致，使用如下代码（buquan.py）删除没有标签文件的图片。

#encoding:utf-8
import os
import os.path
from os import listdir, getcwd
from os.path import join
import glob
txt_dir='./json'
pic_dir='./img_jpg'
 
 
def txt(rootdir):
    a=[]
    paths = glob.glob(os.path.join(rootdir, '*.json'))
    paths.sort()
    #print(paths)
    for filename in paths:
            filename=filename.split(rootdir)[1].split('.json')[0]
            print(filename)
            a.append(filename)
    return a
def pic(rootdir):
    a=[]
    paths = glob.glob(os.path.join(rootdir, '*.jpg'))
    paths.sort()
    #print(paths)
    for filename in paths:
            filename=filename.split(rootdir)[1].split('.jpg')[0]
            print(filename)
            a.append(filename)
    return a

 
txt_set=txt(txt_dir)
txt_set=set(txt_set)
pic_set=pic(pic_dir)
pic_set=set(pic_set)
#comp=txt_set-pic_set
comp=pic_set-txt_set#图片比标注多时，进行做差
print("ok")
print(len(comp))
 
for item in comp:#删去这些无标注的图片
    file=pic_dir+'/'+item+'.jpg'
    if os.path.exists(file):
        os.remove(file)
        print(file)
        
#for item in comp:
 #   file=txt_dir+'/'+item+'.json'
  #  if os.path.exists(file):
   #     os.remove(file)
    #    print(file)

二、yolo网络训练

将样本划分train、trainval、test、val，代码（split_train_val.py）如下：

# coding:utf-8

import os
import random
import argparse

parser = argparse.ArgumentParser()
#xml文件的地址，根据自己的数据进行修改 xml一般存放在Annotations下
parser.add_argument('--xml_path', default='images', type=str, help='input xml label path')
#数据集的划分，地址选择自己数据下的ImageSets/Main
parser.add_argument('--txt_path', default='./ImageSets/Main', type=str, help='output txt label path')
opt = parser.parse_args()

trainval_percent = 0.9
train_percent = 0.9
xmlfilepath = opt.xml_path
txtsavepath = opt.txt_path
total_xml = os.listdir(xmlfilepath)
if not os.path.exists(txtsavepath):
    os.makedirs(txtsavepath)

num = len(total_xml)
list_index = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list_index, tv)
train = random.sample(trainval, tr)

file_trainval = open(txtsavepath + '/trainval.txt', 'w')
file_test = open(txtsavepath + '/test.txt', 'w')
file_train = open(txtsavepath + '/train.txt', 'w')
file_val = open(txtsavepath + '/val.txt', 'w')

for i in list_index:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        file_trainval.write(name)
        if i in train:
            file_train.write(name)
        else:
            file_val.write(name)
    else:
        file_test.write(name)

file_trainval.close()
file_train.close()
file_val.close()
file_test.close()

使用如下代码（voc_label.py）生成test.txt、train.txt、val.txt，记录样本图片地址。

# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
from os import getcwd

sets = ['train', 'val', 'test']
abs_path = os.getcwd()
print(abs_path)



wd = getcwd()
for image_set in sets:
    image_ids = open('./ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
    list_file = open('./%s.txt' % (image_set), 'w')
    for image_id in image_ids:
        list_file.write(abs_path + '/images/%s.jpg\n' % (image_id))
        #convert_annotation(image_id)
    list_file.close()

在yolov5开源代码里train.py中更改训练样本地址，进行训练获取权重。

三、基于ros收发

基于ros接收两种图像话题，并基于其中一种图像，调用yolov5网络进行检测，并把检测到的bbox投影到另一种图像话题上，并将两种处理后的图像发送出去，代码demo0.py如下：

#! /usr/bin/env python2

from __future__ import print_function

#python2
import os
import roslib
import rospy
from std_msgs.msg import Header
from std_msgs.msg import String
from sensor_msgs.msg import Image
# from ros_numpy import msgify
#from cv_bridge import CvBridge
import sys
sys.path.remove('/opt/ros/melodic/lib/python2.7/dist-packages')
import threading
import time
import numpy as np
import cv2
import threading
#python3
import argparse
from pathlib import Path
import torch
import torch.backends.cudnn as cudnn
import time

FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

from models.common import DetectMultiBackend
from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, time_sync
@torch.no_grad()

class SubscribeAndPublish:
    def __init__(self):
        self.all_obstacle_str=''

        #self.sub1_name="/cam_rgb/usb_cam/image_rect_color"
        self.sub1_name="/pub_rgb"
        self.sub1= rospy.Subscriber(self.sub1_name, Image,self.callback_rgb)
        self.sub2_name="/pub_t"
        self.sub2= rospy.Subscriber(self.sub2_name, Image,self.callback_t)
        
        

        self.pub1_name="detect_rgb"
        self.pub1= rospy.Publisher(self.pub1_name, Image,queue_size=1)
        self.pub2_name="detect_t"
        self.pub2= rospy.Publisher(self.pub2_name, Image,queue_size=1)
        self.timed=0
        self.model=model
        self.device=device
        self.stride=32 
        self.names=['pedestrian', 'cyclist', 'car', 'bus', 'truck', 'traffic_light', 'traffic_sign']
        self.pt=True
        self.jit=False
        self.onnx=False
        self.engine=False
        self.img_rgb=[]
        #self.mode=0
        #self.path_rgb='./img/img_rgb'
        #self.path_t='./img/img_t'
        #self.path_info='./img/img_info'
        # self.bridge = CvBridge()

       
    def callback_rgb(self,data):
        print('callback1')
        img_rgb = np.frombuffer(data.data, dtype=np.uint8).reshape(data.height, data.width, -1)
        img_rgb=img_rgb[:,:,::-1]
        self.img_rgb=img_rgb
        cv2.imwrite('./temp/rgb/rgb.jpg',img_rgb)
        

     
    def callback_t(self,data):
        print('callback2')
        time_run1=time.time()
        timed=time.time()
        print('time:',timed-self.timed)
        self.timed=timed
        
        img_t = np.frombuffer(data.data, dtype=np.uint8).reshape(data.height, data.width, -1)
        img_t=img_t[:,:,::-1]
        cv2.imwrite('./temp/t/t.jpg',img_t)
        print('save image successful!')
        #check_requirements(exclude=('tensorboard', 'thop'))

        img_t,img_rgb=self.run(**vars(opt))
        
        
        #cv2.imshow('i',img)
        #cv2.waitKey(1)
        #print(len(img))
        #print('detection!')
        #self.pub1.publish(CvBridge().cv2_to_imgmsg(img_rgb,"rgb8"))
        if len(img_t)>0:
           print('send img')
           self.publish_image(self.pub2,img_t,'base_link')
           self.publish_image(self.pub1,img_rgb,'base_link')
           time_run2=time.time()
           time_run=time_run2-time_run1
           print('time_run:',time_run)
           print('')
    
    def publish_image(self,pub, data, frame_id='base_link'):
        assert len(data.shape) == 3, 'len(data.shape) must be equal to 3.'
        header = Header(stamp=rospy.Time.now())
        header.frame_id = frame_id
    
        msg = Image()
        msg.height = data.shape[0]
        msg.width = data.shape[1]
        msg.encoding = 'rgb8'
        msg.data = np.array(data).tostring()
        msg.header = header
        msg.step = msg.width * 1 * 3
    
        pub.publish(msg)
    
    def get_time_stamp(self):
        ct = time.time()
        local_time = time.localtime(ct)
        data_head = time.strftime("%Y-%m-%d %H:%M:%S", local_time)
        data_secs = (ct - int(ct)) * 1000
        time_stamp = "%s.%03d" % (data_head, data_secs)
        #print(time_stamp)
        stamp = ("".join(time_stamp.split()[0].split("-"))+"".join(time_stamp.split()[1].split(":"))).replace('.', '')
        #print(stamp)
        return stamp
    
    def run(self,weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
            source=ROOT / 'temp',  # file/dir/URL/glob, 0 for webcam
            imgsz=(640, 640),  # inference size (height, width)
            conf_thres=0.25,  # confidence threshold
            iou_thres=0.45,  # NMS IOU threshold
            max_det=1000,  # maximum detections per image
            device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
            view_img=False,  # show results
            save_txt=False,  # save results to *.txt
            save_conf=False,  # save confidences in --save-txt labels
            save_crop=False,  # save cropped prediction boxes
            nosave=False,  # do not save images/videos
            classes=None,  # filter by class: --class 0, or --class 0 2 3
            agnostic_nms=False,  # class-agnostic NMS
            augment=False,  # augmented inference
            visualize=False,  # visualize features
            update=False,  # update all models
            project=ROOT / 'runs/detect',  # save results to project/name
            name='exp',  # save results to project/name
            exist_ok=False,  # existing project/name ok, do not increment
            line_thickness=3,  # bounding box thickness (pixels)
            hide_labels=False,  # hide labels
            hide_conf=False,  # hide confidences
            half=False,  # use FP16 half-precision inference
            dnn=False,  # use OpenCV DNN for ONNX inference
            ):
        source = str(source)
        save_img = not nosave and not source.endswith('.txt')  # save inference images
        is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
        is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
        webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)

        # Directories
        #save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
        #(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
        
        # Load model
        imgsz = check_img_size(imgsz, s=self.stride)  # check image size

        # Half

        
        # Dataloader
        '''if webcam:
            print('webcam')
            view_img = check_imshow()
            cudnn.benchmark = True  # set True to speed up constant image size inference
            dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
            bs = len(dataset)  # batch_size
        else:
            print('no')
            dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
            bs = 1  # batch_size'''
        dataset = LoadImages(source, img_size=imgsz, stride=self.stride, auto=self.pt)
        rgb_data=LoadImages('./temp/rgb', img_size=imgsz, stride=self.stride, auto=self.pt)
        bs = 1  # batch_size
        vid_path, vid_writer = [None] * bs, [None] * bs
        
        # Run inference
        #self.model.warmup(imgsz=(1, 3, *imgsz), half=half)  # warmup
        dt, seen = [0.0, 0.0, 0.0], 0
        for path, im, im0s, vid_cap, s in rgb_data:
            im=torch.from_numpy(im).to(self.device)
            im = im.half() if half else im.float()  # uint8 to fp16/32
            im /= 255  # 0 - 255 to 0.0 - 1.0
            if len(im.shape) == 3:
                im = im[None]  # expand for batch dim
            rgb=im
            rgb0s=im0s
        for path, im, im0s, vid_cap, s in dataset:
            t1 = time_sync()
            im = torch.from_numpy(im).to(self.device)
            im = im.half() if half else im.float()  # uint8 to fp16/32
            im /= 255  # 0 - 255 to 0.0 - 1.0
            if len(im.shape) == 3:
                im = im[None]  # expand for batch dim
            t2 = time_sync()
            dt[0] += t2 - t1

            # Inference
            #visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
            
            pred = self.model(im, augment=augment, visualize=visualize)
            t3 = time_sync()
            dt[1] += t3 - t2

            # NMS
            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
            dt[2] += time_sync() - t3

            # Second-stage classifier (optional)
            # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)

            # Process predictions
            for i, det in enumerate(pred):  # per image
                seen += 1
                '''if webcam:  # batch_size >= 1
                    p, im0, frame = path[i], im0s[i].copy(), dataset.count
                    s += f'{i}: '
                else:
                    p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)'''
                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
                rgb0=rgb0s.copy()
                p = Path(p)  # to Path
                #save_path = str(save_dir / p.name)  # im.jpg
                #txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
                s += '%gx%g ' % im.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
                imc = im0.copy() if save_crop else im0  # for save_crop
                rgbc = rgb0.copy() if save_crop else rgb0  # for save_crop
                annotator = Annotator(im0, line_width=line_thickness, example=str(self.names))
                annotator_rgb = Annotator(rgb0, line_width=line_thickness, example=str(self.names))
                if len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string

                    # Write results
                    for *xyxy, conf, cls in reversed(det):
                        if save_txt:  # Write to file
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                            line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
                            with open(txt_path + '.txt', 'a') as f:
                                f.write(('%g ' * len(line)).rstrip() % line + '\n')

                        if save_img or save_crop or view_img:  # Add bbox to image
                            c = int(cls)  # integer class
                            label = None if hide_labels else (self.names[c] if hide_conf else f'{self.names[c]} {conf:.2f}')
                            annotator.box_label(xyxy, label, color=colors(c, True))
                            annotator_rgb.box_label(xyxy, label, color=colors(c, True))
                            '''if save_crop:
                                save_one_box(xyxy, imc, file=save_dir / 'crops' / self.names[c] / f'{p.stem}.jpg', BGR=True)'''

                # Print time (inference-only)
                LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')

                # Stream results
                im0 = annotator.result()
                rgb0=annotator_rgb.result()
                
        return im0,rgb0
        
def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / './runs/train/exp2/weights/best.pt', help='model path(s)')
    parser.add_argument('--source', type=str, default=ROOT / 'temp/t', help='file/dir/URL/glob, 0 for webcam')
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='show results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--visualize', action='store_true', help='visualize features')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
    opt = parser.parse_args()
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
    print_args(FILE.stem, opt)
    return opt



def main(opt,model,device):
    rospy.init_node('biaoding_ws', anonymous=True)

    #####################
    t=SubscribeAndPublish()
    #####################
    rospy.spin()
if __name__ == "__main__":
    opt = parse_opt()
    device = ''
    weights = './runs/train/exp2/weights/best.pt'
    dnn=False
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=dnn)
    half=False
    model.model.half() if half else model.model.float()
    model.warmup(imgsz=(1, 3, 640, 640), half=half)
    main(opt,model,device)