在输入网络之前对图片的预处理代码

最新推荐文章于 2023-11-19 10:07:11 发布

Alphapeople

最新推荐文章于 2023-11-19 10:07:11 发布

阅读量563

点赞数 1

分类专栏： python 机器学习文章标签：网络 python 计算机视觉

本文链接：https://blog.csdn.net/weixin_38241876/article/details/111408202

版权

python 同时被 2 个专栏收录

112 篇文章 1 订阅

订阅专栏

机器学习

81 篇文章 1 订阅

订阅专栏

from PIL import Image
import numpy as np

def letterbox_image(image, size):
    iw, ih = image.size
    w, h = size
    scale = min(w/iw, h/ih)
    nw = int(iw*scale)
    nh = int(ih*scale)

    image = image.resize((nw,nh), Image.BICUBIC)
    new_image = Image.new('RGB', size, (128,128,128))
    new_image.paste(image, ((w-nw)//2, (h-nh)//2))
    return new_image

def preprocess_image(image):
    mean = [0.40789655, 0.44719303, 0.47026116]
    std = [0.2886383, 0.27408165, 0.27809834]
    return ((np.float32(image) / 255.) - mean) / std

def normalizeimage(imgpath,w,h):
    image = Image.open(imgpath)
    crop_img = letterbox_image(image, [w,h])#pad成正方形
    # 将RGB转化成BGR，这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的
    photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1]
    # 图片预处理，归一化
    photo = np.reshape(np.transpose(preprocess_image(photo), (2, 0, 1)),
                       [1, 3, w, h])
    return photo

imgpath = 'img/street.jpg'
print(normalizeimage(imgpath,512,512).shape)

class _ReweightedDataset(object):
    def __init__(self, res, img_dir, sources, weights, split):
        self.res = tuple(res)

        self.num_samples = []
        self.paths = []
        for src in sources:
            paths = sorted([x for x in glob.glob(os.path.join(img_dir, src, '**'), recursive=True) if is_image_file(x)])
            self.num_samples.append(len(paths))
            self.paths += paths
        self.weights = weights
        self.split = split

        print('Loading dataset:', split, sources, self.num_samples)

    def __getitem__(self, index):
        if self.split == 'train':
            cls_id = random.randint(0, 3)
            cumsum = np.cumsum(self.weights)
            which = np.sum(cumsum/cumsum[-1] < random.random())
            index = sum(self.num_samples[:which]) + random.randint(0, self.num_samples[which]-1)
        else:
            cls_id = index % 4
            index = index // 4

        img = cv2.imread(self.paths[index])
        img = np.rot90(img, k=cls_id)

        if self.split == 'train':
            img = self.aug(img)
        img = self.transform(img)

        return {'input': img, 'target': cls_id}

    def __len__(self):
        return 10000 if self.split == 'train' else sum(self.num_samples)*4

    def transform(self, img):
        op = transforms.Compose([
            lambda x: cv2.resize(x, self.res, interpolation=cv2.INTER_AREA),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
        ])
        return op(img)

    def aug(self, img):
        op = iaa.Sequential([
            iaa.PerspectiveTransform(scale=(0, 0.1), keep_size=False),
            iaa.Crop(percent=(0, 0.1), keep_size=False),
            iaa.Sometimes(
                0.5,
                iaa.GaussianBlur(sigma=(0, 0.5))
            ),
            iaa.LinearContrast((0.75, 1.5)),
            iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5),
            iaa.Multiply((0.8, 1.2), per_channel=0.2),
        ], random_order=True)
        return op(image=img)