yolo中 LoadImages, LoadScreenshots, LoadStreams

该文介绍了一个名为LoadImages的类,用于读取本地图片、视频数据,支持自定义转换和步长。同时提到了LoadStreams,可以处理远程RTSP、RTMP和HTTP数据流,实现屏幕截图和远程流媒体的处理。
摘要由CSDN通过智能技术生成

LoadImages:

        功能: 读取本地图片,视频,摄像头数据。

        参数说明:

                        path: 路径地址(图片文件夹,文件,视频文件夹,文件,或者两者混合都可以。)

                        img_size: resize,或者letterbox转换后的大小。

                        stride: letterbox转换参数

                        auto: letterbox 转换参数

                        transforms: 自定义transforms

                        vid_stride: 视频文件fps步长

class LoadImages:
    def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
        if isinstance(path, str) and Path(path).suffix == ".txt":  # *.txt file with img/vid/dir on each line
            path = Path(path).read_text().rsplit()
        files = []
        for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
            p = str(Path(p).resolve())
            if '*' in p:
                files.extend(sorted(glob.glob(p, recursive=True)))  # glob
            elif os.path.isdir(p):
                files.extend(sorted(glob.glob(os.path.join(p, '*.*'))))  # dir
            elif os.path.isfile(p):
                files.append(p)  # files
            else:
                raise FileNotFoundError(f'{p} does not exist')

        images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS]
        videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS]
        print(f'初始化。。。。。。。')
        ni, nv = len(images), len(videos)
        print(f'图片地址:{images} 长度:{ni}')
        print(f'视频地址:{videos}, 长度{nv}')
        self.img_size = img_size
        self.stride = stride
        self.files = images + videos
        self.nf = ni + nv  # number of files
        self.video_flag = [False] * ni + [True] * nv
        self.mode = 'image'
        self.auto = auto
        self.transforms = transforms  # optional
        self.vid_stride = vid_stride  # video frame-rate stride
        if any(videos):
            print(f'创建videos')
            self._new_video(videos[0])  # new video
        else:
            self.cap = None
        assert self.nf > 0, f'No images or videos found in {p}. ' \
                            f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}'
        print(f'初始化结束')

    def __iter__(self):
        self.count = 0
        return self

    def __next__(self):
        if self.count == self.nf:
            print(f'总共读取了: {self.count} 文件, 抛出错误结束')
            raise StopIteration

        path = self.files[self.count]
        print(f'读取第{self.count} 文件, {path}')
        if self.video_flag[self.count]:
            # Read video
            print(f'读取video, fps步长:{self.vid_stride}')
            self.mode = 'video'
            for _ in range(self.vid_stride):
                self.cap.grab()
            ret_val, im0 = self.cap.retrieve()

            while not ret_val:
                print(f'视频文件读取结束: 索引加1{self.count}, 初始化下一视频文件')
                self.count += 1
                self.cap.release()
                if self.count == self.nf:  # last video
                    raise StopIteration
                path = self.files[self.count]
                self._new_video(path)

                ret_val, im0 = self.cap.read()

            self.frame += 1
            s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '

        else:
            # Read image
            self.count += 1
            im0 = cv2.imread(path)  # BGR
            assert im0 is not None, f'Image Not Found {path}'
            s = f'image {self.count}/{self.nf} {path}: '
        if self.transforms:
            im = self.transforms(im0)  # transforms
        else:
            im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0]  # padded resize
            im = im.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
            im = np.ascontiguousarray(im)  # contiguous
        return path, im, im0, self.cap, s

    def _new_video(self, path):
        # Create a new video capture object
        self.frame = 0
        self.cap = cv2.VideoCapture(path)
        self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
        self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META))  # rotation degrees

    def _cv2_rotate(self, im):
        # Rotate a cv2 video manually
        if self.orientation == 0:
            return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
        elif self.orientation == 180:
            return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE)
        elif self.orientation == 90:
            return cv2.rotate(im, cv2.ROTATE_180)
        return im

    def __len__(self):
        return self.nf  # number of files

返回数据:

                path : 文件地址

                im: 增强后的图片

                im0s: 原始图片

                vid_cap:  cv2.VideoCapture

                s: 描述信息

source = 'E:\yolodata'
imgsz = 640
stride = 32
pt = True
vid_stride = 1
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)

i = 0
for i, (path, im, im0s, vid_cap, s) in enumerate(dataset):
    cv2.imshow('im', im)
    cv2.waitKey(0)  # 默认为0,无限等待
    cv2.destroyAllWindows()  # 释放所有窗口

LoadStreams

功能: 获取本地屏幕截图。

使用:

python detect.py --source "screen 0 100 100 512 256"
user32 = ctypes.windll.user32
screensize = user32.GetSystemMetrics(0), user32.GetSystemMetrics(1)

source = f"screen 1 0 0 {screensize[0]} {screensize[1]}"
print(source)

dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)

for i, (path, im, im0s, vid_cap, s) in enumerate(dataset):
    print(path)
    cv2.imshow('im', im0s)
    cv2.waitKey(0)  # 默认为0,无限等待
    cv2.destroyAllWindows()  # 释放所有窗口

LoadStreams

功能:获取远程数据流 RTSP, RTMP, HTTP streams

python detect.py --source 'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值