用yolo vgg16孪生对比图片

最新推荐文章于 2024-04-23 14:35:36 发布
齐整。
最新推荐文章于 2024-04-23 14:35:36 发布
阅读量133
点赞数
文章标签： YOLO python 开发语言
本文链接：https://blog.csdn.net/weixin_43830600/article/details/134117535
版权
# -*- coding: utf-8 -*-
# @Author: b01e
# torch                   1.12.0
# torchvision             0.13.0
import time

import cv2
import numpy as np
import onnxruntime
import torch.nn as nn
import torch
import torch.utils
from PIL import Image
import os
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.models import VGG16_Weights

devices = torch.device('cpu')

nets = models.vgg16(weights=VGG16_Weights.DEFAULT)


class Siamese(nn.Module):
    def __init__(self):
        super(Siamese, self).__init__()
        self.resnet = nets.features
        self.resnet = self.resnet.eval()
        self.resnet.to(devices)
        flat_shape = 512 * 3 * 3
        self.fully_connect1 = torch.nn.Linear(flat_shape, 512)
        self.fully_connect2 = torch.nn.Linear(512, 1)
        self.sgm = nn.Sigmoid()

    def forward(self, x1, x2):
        x1 = self.resnet(x1)
        x2 = self.resnet(x2)

        x1 = torch.flatten(x1, 1)
        x2 = torch.flatten(x2, 1)
        x = torch.abs(x1 - x2)
        x = self.fully_connect1(x)
        x = self.fully_connect2(x)
        x = self.sgm(x)
        return x


we_nets = torch.load('./bj.pth', map_location='cpu')
we_nets.to(devices)

transforms_list = transforms.Compose([transforms.Resize((105, 105)), transforms.ToTensor()])


def getdata(p1, p2):
    ch = transforms_list(p1)
    ch = ch.to(devices).unsqueeze(0)
    ch2 = transforms_list(p2)
    ch2 = ch2.to(devices).unsqueeze(0)
    h = we_nets(ch, ch2)
    return h[0, 0].item()


class YOLOV5():
    def __init__(self):
        self.onnx_session = onnxruntime.InferenceSession(os.path.join(os.path.dirname(__file__), 'best1.onnx'))
        self.input_name = self.get_input_name()
        self.output_name = self.get_output_name()

    def get_input_name(self):
        input_name = []
        for node in self.onnx_session.get_inputs():
            input_name.append(node.name)
        return input_name

    def get_output_name(self):
        output_name = []
        for node in self.onnx_session.get_outputs():
            output_name.append(node.name)
        return output_name

    def get_input_feed(self, img_tensor):
        input_feed = {}
        for name in self.input_name:
            input_feed[name] = img_tensor
        return input_feed

    def inference(self, image_bytes):
        img = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)  # 读取图片
        y, x, _ = img.shape
        or_img = cv2.resize(img, (320, 320))
        img = or_img[:, :, ::-1].transpose(2, 0, 1)  # BGR2RGB和HWC2CHW
        img = img.astype(dtype=np.float32)
        img /= 255.0
        img = np.expand_dims(img, axis=0)
        input_feed = self.get_input_feed(img)
        pred = self.onnx_session.run(None, input_feed)[0]
        return pred, x, y


def pynms(dets, thresh):  # 非极大抑制
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    areas = (y2 - y1 + 1) * (x2 - x1 + 1)
    scores = dets[:, 4]
    keep = []
    index = scores.argsort()[::-1]  # 置信度从大到小排序（下标）

    while index.size > 0:
        i = index[0]
        keep.append(i)

        x11 = np.maximum(x1[i], x1[index[1:]])  # 计算相交面积
        y11 = np.maximum(y1[i], y1[index[1:]])
        x22 = np.minimum(x2[i], x2[index[1:]])
        y22 = np.minimum(y2[i], y2[index[1:]])

        w = np.maximum(0, x22 - x11 + 1)  # 当两个框不想交时x22 - x11或y22 - y11 为负数，
        # 两框不相交时把相交面积置0
        h = np.maximum(0, y22 - y11 + 1)  #

        overlaps = w * h
        ious = overlaps / (areas[i] + areas[index[1:]] - overlaps)  # 计算IOU

        idx = np.where(ious <= thresh)[0]  # IOU小于thresh的框保留下来
        index = index[idx + 1]  # 下标以1开始

    return keep


def xywh2xyxy(x):
    # [x, y, w, h] to [x1, y1, x2, y2]
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2
    y[:, 1] = x[:, 1] - x[:, 3] / 2
    y[:, 2] = x[:, 0] + x[:, 2] / 2
    y[:, 3] = x[:, 1] + x[:, 3] / 2
    return y


def filter_box(org_box, conf_thres=0.5, iou_thres=0.5):  # 过滤掉无用的框
    org_box = np.squeeze(org_box)  # 删除为1的维度
    conf = org_box[..., 4] > conf_thres  # 删除置信度小于conf_thres的BOX
    # print(conf)
    box = org_box[conf == True]
    cls_cinf = box[..., 5:]
    cls = []
    for i in range(len(cls_cinf)):
        cls.append(int(np.argmax(cls_cinf[i])))
    all_cls = list(set(cls))  # 删除重复的类别
    output = []
    for i in range(len(all_cls)):
        curr_cls = all_cls[i]
        curr_cls_box = []
        curr_out_box = []
        for j in range(len(cls)):
            if cls[j] == curr_cls:
                box[j][5] = curr_cls  # 将第6列元素替换为类别下标
                curr_cls_box.append(box[j][:6])  # 当前类别的BOX
        curr_cls_box = np.array(curr_cls_box)
        curr_cls_box = xywh2xyxy(curr_cls_box)
        curr_out_box = pynms(curr_cls_box, iou_thres)  # 经过非极大抑制后输出的BOX下标
        for k in curr_out_box:
            output.append(curr_cls_box[k])  # 利用下标取出非极大抑制后的BOX
    output = np.array(output)
    return output


def draw(box_data, x, y):  # 画图
    boxes = box_data[..., :4].astype(np.int32)  # 取整方便画框
    result = []
    for each in boxes:
        top, left, right, bottom = each
        box = [int(top / 320 * x), int(left / 320 * y), int(right / 320 * x), int(bottom / 320 * y)]
        result.append(box)
    return result


model = YOLOV5()


def get_boxx(image):
    output, x, y = model.inference(image)
    outbox = filter_box(output)
    return draw(outbox, x, y)


import requests

url = "https:///captcha_v3/batch/v3/51435/2023-10-30T13/word/.jpg?challenge="

response = requests.get(url)
temp_images = str(int(time.time() * 1000)) + '.jpg'
with open(temp_images, 'wb') as f:
    f.write(response.content)
    
# 这里不会写了  怎么比较起来更快 更简单？？？？
image_path = temp_images
with open(image_path, 'rb') as f:
    images = f.read()

poses = get_boxx(images)

y = sorted(poses, key=lambda x: x[1])
up_images = sorted(y[:len(y) // 2], key=lambda x: x[0])
down_images = sorted(y[len(y) // 2:], key=lambda x: x[0])
images_dict = dict()
img = Image.open(image_path)
for index, each in enumerate(down_images):
    img_cropped = img.crop(tuple(each))
    p1 = img_cropped.convert('RGB')
    for big_each in up_images:
        big_img_cropped = img.crop(tuple(big_each))
        p2 = big_img_cropped.convert('RGB')
        sio = getdata(p1, p2)

        if images_dict.get(index):
            images_dict[index] = max(images_dict[index], sio)
        else:
            images_dict[index] = sio
print(images_dict)

os.remove(temp_images)