51c视觉~CV~合集3

最新推荐文章于 2025-02-03 13:32:46 发布

大海依然在

最新推荐文章于 2025-02-03 13:32:46 发布

阅读量78

点赞数

一、 CV确定对象的方向

介绍如何使用OpenCV确定对象的方向(即旋转角度，以度为单位)。

51c视觉~CV~合集3_视觉AI

先决条件安装Python3.7或者更高版本。可以参考下文链接： https://automaticaddison.com/how-to-set-up-anaconda-for-windows-10/ 或者自行下载安装：https://www.python.org/getit/相关包安装与设置在我们开始之前，让我们确保我们已经安装了所有的软件包。检查您的机器上是否安装了OpenCV 。如果你使用 Anaconda，你可以输入：conda install -c conda-forge opencv或者使用pip安装，指令：pip install opencv-python安装科学计算库 Numpy：pip install numpy准备测试图像找一张图片。我的输入图像宽度为 1200 像素，高度为 900 像素。我的输入图像的文件名是input_img.jpg。编写代码这是代码。它接受一个名为input_img.jpg的图像并输出一个名为output_img.jpg的带注释的图像。部分代码来自官方 OpenCV 实现： https://docs.opencv.org/4.x/d1/dee/tutorial_introduction_to_pca.html

import cv2 as cv
from math import atan2, cos, sin, sqrt, pi
import numpy as np
 
def drawAxis(img, p_, q_, color, scale):
  p = list(p_)
  q = list(q_)
 
  ## [visualization1]
  angle = atan2(p[1] - q[1], p[0] - q[0]) # angle in radians
  hypotenuse = sqrt((p[1] - q[1]) * (p[1] - q[1]) + (p[0] - q[0]) * (p[0] - q[0]))
 
  # Here we lengthen the arrow by a factor of scale
  q[0] = p[0] - scale * hypotenuse * cos(angle)
  q[1] = p[1] - scale * hypotenuse * sin(angle)
  cv.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), color, 3, cv.LINE_AA)
 
  # create the arrow hooks
  p[0] = q[0] + 9 * cos(angle + pi / 4)
  p[1] = q[1] + 9 * sin(angle + pi / 4)
  cv.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), color, 3, cv.LINE_AA)
 
  p[0] = q[0] + 9 * cos(angle - pi / 4)
  p[1] = q[1] + 9 * sin(angle - pi / 4)
  cv.line(img, (int(p[0]), int(p[1])), (int(q[0]), int(q[1])), color, 3, cv.LINE_AA)
  ## [visualization1]
 
def getOrientation(pts, img):
  ## [pca]
  # Construct a buffer used by the pca analysis
  sz = len(pts)
  data_pts = np.empty((sz, 2), dtype=np.float64)
  for i in range(data_pts.shape[0]):
    data_pts[i,0] = pts[i,0,0]
    data_pts[i,1] = pts[i,0,1]
 
  # Perform PCA analysis
  mean = np.empty((0))
  mean, eigenvectors, eigenvalues = cv.PCACompute2(data_pts, mean)
 
  # Store the center of the object
  cntr = (int(mean[0,0]), int(mean[0,1]))
  ## [pca]
 
  ## [visualization]
  # Draw the principal components
  cv.circle(img, cntr, 3, (255, 0, 255), 2)
  p1 = (cntr[0] + 0.02 * eigenvectors[0,0] * eigenvalues[0,0], cntr[1] + 0.02 * eigenvectors[0,1] * eigenvalues[0,0])
  p2 = (cntr[0] - 0.02 * eigenvectors[1,0] * eigenvalues[1,0], cntr[1] - 0.02 * eigenvectors[1,1] * eigenvalues[1,0])
  drawAxis(img, cntr, p1, (255, 255, 0), 1)
  drawAxis(img, cntr, p2, (0, 0, 255), 5)
 
  angle = atan2(eigenvectors[0,1], eigenvectors[0,0]) # orientation in radians
  ## [visualization]
 
  # Label with the rotation angle
  label = "  Rotation Angle: " + str(-int(np.rad2deg(angle)) - 90) + " degrees"
  textbox = cv.rectangle(img, (cntr[0], cntr[1]-25), (cntr[0] + 250, cntr[1] + 10), (255,255,255), -1)
  cv.putText(img, label, (cntr[0], cntr[1]), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1, cv.LINE_AA)
 
  return angle
 
# Load the image
img = cv.imread("input_img.jpg")
 
# Was the image there?
if img is None:
  print("Error: File not found")
  exit(0)
 
cv.imshow('Input Image', img)
 
# Convert image to grayscale
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
 
# Convert image to binary
_, bw = cv.threshold(gray, 50, 255, cv.THRESH_BINARY | cv.THRESH_OTSU)
 
# Find all the contours in the thresholded image
contours, _ = cv.findContours(bw, cv.RETR_LIST, cv.CHAIN_APPROX_NONE)
 
for i, c in enumerate(contours):
 
  # Calculate the area of each contour
  area = cv.contourArea(c)
 
  # Ignore contours that are too small or too large
  if area < 3700 or 100000 < area:
    continue
 
  # Draw each contour only for visualisation purposes
  cv.drawContours(img, contours, i, (0, 0, 255), 2)
 
  # Find the orientation of each shape
  getOrientation(c, img)
 
cv.imshow('Output Image', img)
cv.waitKey(0)
cv.destroyAllWindows()
  
# Save the output image to the current directory
cv.imwrite("output_img.jpg", img)

1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
17.
18.
19.
20.
21.
22.
23.
24.
25.
26.
27.
28.
29.
30.
31.
32.
33.
34.
35.
36.
37.
38.
39.
40.
41.
42.
43.
44.
45.
46.
47.
48.
49.
50.
51.
52.
53.
54.
55.
56.
57.
58.
59.
60.
61.
62.
63.
64.
65.
66.
67.
68.
69.
70.
71.
72.
73.
74.
75.
76.
77.
78.
79.
80.
81.
82.
83.
84.
85.
86.
87.
88.
89.
90.
91.
92.
93.
94.
95.
96.
97.
98.
99.
100.
101.
102.

运行结果

使用PCA(主成分分析)方法获取物体的主方向，效果如下：

了解旋转轴

每个对象的正 x 轴是红线。每个对象的正 y 轴是蓝线。全局正x 轴从左到右水平穿过图像。全局正z 轴指向此页外。全局正y 轴从图像底部垂直指向图像顶部。使用右手定则测量旋转，将四根手指伸直（食指到小指）沿全局正 x 轴方向伸出。

51c视觉~CV~合集3_视觉AI_02

然后将四根手指逆时针旋转 90 度。您的指尖指向正 y 轴，而您的拇指指向页面外指向正 z 轴。

51c视觉~CV~合集3_视觉AI_03

计算0~180°之间的方向

如果我们要计算对象的方向并确保结果始终在 0 到 180 度之间，我们可以使用以下代码：


# This programs calculates the orientation of an object.
# The input is an image, and the output is an annotated image
# with the angle of otientation for each object (0 to 180 degrees)
 
import cv2 as cv
from math import atan2, cos, sin, sqrt, pi
import numpy as np
 
# Load the image
img = cv.imread("input_img.jpg")
 
# Was the image there?
if img is None:
  print("Error: File not found")
  exit(0)
 
cv.imshow('Input Image', img)
 
# Convert image to grayscale
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
 
# Convert image to binary
_, bw = cv.threshold(gray, 50, 255, cv.THRESH_BINARY | cv.THRESH_OTSU)
 
# Find all the contours in the thresholded image
contours, _ = cv.findContours(bw, cv.RETR_LIST, cv.CHAIN_APPROX_NONE)
 
for i, c in enumerate(contours):
 
  # Calculate the area of each contour
  area = cv.contourArea(c)
 
  # Ignore contours that are too small or too large
  if area < 3700 or 100000 < area:
    continue
 
  # cv.minAreaRect returns:
  # (center(x, y), (width, height), angle of rotation) = cv2.minAreaRect(c)
  rect = cv.minAreaRect(c)
  box = cv.boxPoints(rect)
  box = np.int0(box)
 
  # Retrieve the key parameters of the rotated bounding box
  center = (int(rect[0][0]),int(rect[0][1])) 
  width = int(rect[1][0])
  height = int(rect[1][1])
  angle = int(rect[2])
 
     
  if width < height:
    angle = 90 - angle
  else:
    angle = -angle
         
  label = "  Rotation Angle: " + str(angle) + " degrees"
  textbox = cv.rectangle(img, (center[0]-35, center[1]-25), 
    (center[0] + 295, center[1] + 10), (255,255,255), -1)
  cv.putText(img, label, (center[0]-50, center[1]), 
    cv.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,0), 1, cv.LINE_AA)
  cv.drawContours(img,[box],0,(0,0,255),2)
 
cv.imshow('Output Image', img)
cv.waitKey(0)
cv.destroyAllWindows()
  
# Save the output image to the current directory
cv.imwrite("min_area_rec_output.jpg", img)

最终输出结果：

参考链接：https://automaticaddison.com/how-to-determine-the-orientation-of-an-object-using-opencv/

二、汽车型号图片搜索

该项目将分两个阶段执行。首先，我们将收集数据并将其转换为矢量。在第二阶段，我们将使用该数据以及输入图像，使用Streamlit框架显示类似的图像。

配置环境

我们将使用ImageBind，这是 Meta 开发的开源库，用于将图像转换为矢量。

代码首先安装 ImageBind 库，该库需要从其 GitHub 存储库克隆才能正确集成和使用。

git clone https://github.com/facebookresearch/ImageBind.git 
cd ImageBind 
pip install -e 。

此外，我们还需要一些其他库，包括ultralytics和qdrant-client，以确保项目正确高效地运行。

数据采集

我们收集了一系列代表各种类型汽车及其各自价格的图片。

请从这里访问数据集：

随后，将它们存储在列表中：一个用于存储名称，另一个用于存储各自的价格。

cars_img_list = [ "img01" , "img02" , "img03" , "img04" , "img05" , "img06" , "img07" , "img08" , "img09" , "img10" , "img11" , "img12" , "img13" , "img14" , "img15" ] 
cars_cost_list = [ "6.49" , "3.99" , "6.66" , "6.65" , "7.04" , "5.65" , "61.85" , "11.00" , "11.63" , "11.56" , "11.86" , "46.05" , "75.90" , “13.59”，“13.99” ]

导入库

现在让我们导入将图像转换为嵌入所需的所有必要库。

from ultralytics import YOLO
import cv2
import os


import torch
from imagebind import data
from imagebind.models import imagebind_model
from imagebind.models.imagebind_model import ModalityType

图像分割

我们将使用YOLOv8算法裁剪掉汽车，从而从图像中去除不必要的噪音。

为了实现这一点，我们首先借助YOLOv8在汽车周围绘制一个边界框，然后使用OpenCV剪切出该区域，最后将图像保存在目录中。

model = YOLO('yolov8n.pt')


for im in cars_img_list:
    img = cv2.imread("cars_imgs/"+im+".jpg")
    img = cv2.resize(img,(320,245))
    results = model(img,stream=True)


    for r in results:
        for box in r.boxes:
            x1,y1,x2,y2 = box.xyxy[0]
            x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2)


            cv2.rectangle(img,(int(x1),int(y1)),(int(x2),int(y2)),(255,0,0),1)
        cv2.imwrite("cropped_imgs/"+im+"_cropped.jpg",img[y1:y2, x1:x2])

图像转矢量

接下来，我们将使用ImageBind库将裁剪的图像转换为矢量嵌入，从而将其转换为数字格式。

注意：这是一个耗时的过程，因为它将从互联网上下载模型。

embedding_list = [] 


model_embed = imagebind_model.imagebind_huge(pretrained= True ) 
model_embed.eval（ ) 
model_embed.to( "cpu" ) 


for i in  range ( 1 , len (cars_img_list)): 
    img_path = "cropped_imgs/img" + str (i)+ "_cropped.jpg"
     vision_data = data.load_and_transform_vision_data([img_path], device) 


    with torch.no_grad(): 
        image_embeddings = model_embed({ModalityType.VISION: vision_data}) 
    embedding_list.append(image_embeddings)

为了减少处理时间，您可以设置pretrained=False，尽管这会降低模型的准确性。

让我们保存嵌入以供稍后在代码中使用。

import pickle
with open('embedded_data.pickle', 'wb') as file:
    pickle.dump(embedding_list, file)

相似图片搜索

进入代码的第二阶段，我们将继续以图像作为输入并识别最相似的图像。随后，我们将显示这些图像及其各自的价格。

导入库

除了先前导入的库之外，还需要以下附加库：

import streamlit as st
from PIL import Image
import base64
import os
from io import BytesIO
import numpy as np

现在我们将通过启动ImageBind模型来启动代码，该模型会将上传的输入图像转换为矢量嵌入。

model_embed = imagebind_model.imagebind_huge(pretrained=True)
model_embed.eval（)
model_embed.to("cpu")

让我们继续打开保存的文件“embedded_data.pickle”，其中包含我们的图像数据集的矢量数据。

存储矢量数据

我们将利用开源矢量数据库Qdrant来存储所有图像的嵌入并将其与输入图像进行比较。

client = QdrantClient(":memory:")


client.recreate_collection(
    collection_name='vector_comparison',
    vectors_cnotallow=VectorParams(size=1024, distance=Distance.COSINE)
)


client.upsert(
    collection_name='vector_comparison',
    points=[
        PointStruct(id=i, vector=embedding_list[i]['vision'][0].tolist()) for i in range(15)
    ]
)

比较图像

接下来，我们将把存储在Qdrant数据库中的每个向量嵌入与提供给程序的输入图像进行比较。

这将通过 3 个步骤完成：

从图像中裁剪汽车。
将裁剪的图像转换为矢量嵌入。
将该向量与其他图像的向量进行比较。

在这个过程中，我们采用余弦相似度来评估嵌入之间的相似度。

我们声明了一个函数，该函数以图像作为输入，并给出与输入图像最相似的 4 张图像的索引。

def image_to_similar_index(cv2Image):
        img = cv2.resize(cv2Image,(320,245))
        model = YOLO('yolov8n.pt')
        results = model(img,stream=True)
        results = model(img,stream=True)
        for r in results:
                for box in r.boxes:
                    x1,y1,x2,y2 = box.xyxy[0]
                    x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2)


                    cv2.rectangle(img,(int(x1),int(y1)),(int(x2),int(y2)),(255,0,0),1)
                    cropped_img = img[y1:y2, x1:x2]
                cv2.imwrite("test_cropped.jpg",cropped_img)


        vision_data = data.load_and_transform_vision_data(["test_cropped.jpg"], device)
        with torch.no_grad():
            test_embeddings = model_embed({ModalityType.VISION: vision_data})


        client.upsert(
                collection_name='vector_comparison',
                points=[
                  PointStruct(id=20, vector=test_embeddings['vision'][0].tolist()),
                ])
        search_result = client.search(
            collection_name='vector_comparison',
            query_vector=test_embeddings['vision'][0].tolist(),
            limit=20 # Retrieve top similar vectors (excluding the new vector itself)
        )
        return [search_result[1].id,search_result[2].id,search_result[3].id,search_result[4].id]

部署模型

我们现在将继续为我们的模型开发前端 Web 应用程序，以增强交互性和用户友好性。

为了实现这一点，我们将利用Streamlit以简单有效的方式为我们的 Python 应用程序创建 Web 界面。

我们将首先配置页面并将文件上传器小部件集成到网页上。

st.set_page_config(layout="wide")
st.title('Similar Cars Finder')
st.markdown("""
        <style>
               .block-container {
                    padding-top: 3rem;
                    padding-bottom: 0rem;
                    padding-left: 5rem;
                    padding-right: 5rem;
                }
</style>
        """, unsafe_allow_html=True)
uploaded_file = st.file_uploader("Upload an image of a car", type=["jpg", "jpeg", "png"])

现在我们将创建一个函数来显示带有适当填充和边距的图像以及价格。此函数将图像和价格表作为输入，并以格式化的方式将其显示在网页上。

def display_images_with_padding_and_price(images, prices, width, padding, gap):
    cols = st.columns(len(images))
    for col, img, price in zip(cols, images, prices):
        with col:
            col.markdown(
                f"""
                <div style="margin-right: {0}px; text-align: center;">
                    <img src="data:image/jpeg;base64,{img}" width="{250}px;margin-right: {50}px; ">
                    <p style="font-size: 20px;">₹{price} Lakhs</p>
                </div> 
                """,
                unsafe_allow_html=True,
            )

最后，我们将读取上传的图像作为输入，将其转换为NumPy数组，并将其作为输入提供给image_to_similar_index我们之前定义的函数，该函数将返回与输入最相似的图像的索引。

然后，我们将检索与返回的索引相对应的图像和价格，并将它们提供给函数display_images_with_padding_and_price，该函数将格式化图像并将其显示在网页上。

if uploaded_file is not None:
    car_image = Image.open(uploaded_file)
    img_array = np.array(car_image)
    st.image(car_image, captinotallow='Uploaded Car Image', use_column_width=False, width=300)
    results = image_to_similar_index(img_array)


    if os.path.exists("cars_imgs"):
        car_images = [os.path.join(car_images_dir, img) for img in os.listdir(car_images_dir) if img.endswith(('jpg', 'jpeg', 'png'))]
        print(car_images)
    else:
        st.error(f"Directory {car_images_dir} does not exist")
        car_images = []


    if len(car_images) < 4:
        st.error("Not enough car images in the local storage")
    else:
        car_imagess = []
        for i in results:
             car_imagess.append(car_images[i])
        car_prices = [cars_cost_list[a] for a in results]


        car_images_pil = []
        for img_path in car_imagess:
            try:
                img = Image.open(img_path)
                buffered = BytesIO()
                img.save(buffered, format="JPEG")
                img_str = base64.b64encode(buffered.getvalue()).decode()
                car_images_pil.append(img_str)
            except Exception as e:
                st.error(f"Error processing image {img_path}: {e}")


        if car_images_pil:
            st.subheader('Similar Cars with Prices')
            display_images_with_padding_and_price(car_images_pil, car_prices, width=200, padding=10, gap=20)