首先创建一个detect_with_API.py来替代原来的detect.py
import torch
from numpy import random
from models.experimental import attempt_load
from utils.datasets import MyLoadImages
from utils.general import check_img_size, non_max_suppression, apply_classifier, \
scale_coords, set_logging
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier
class simulation_opt:
def __init__(self, weights='models/yolov7.pt',
img_size=640, conf_thres=0.25,
iou_thres=0.45, device='', view_img=False,
classes=None, agnostic_nms=False,
augment=False, update=False, exist_ok=False):
self.weights = weights
self.source = None
self.img_size = img_size
self.conf_thres = conf_thres
self.iou_thres = iou_thres
self.device = device
self.view_img = view_img
self.classes = classes
self.agnostic_nms = agnostic_nms
self.augment = augment
self.update = update
self.exist_ok = exist_ok
class detectapi:
def __init__(self, weights, img_size=640):
self.opt = simulation_opt(weights=weights, img_size=img_size)
weights, imgsz = self.opt.weights, self.opt.img_size
# Initialize
set_logging()
self.device = select_device(self.opt.device)
self.half = self.device.type != 'cpu' # half precision only supported on CUDA
# Load model
self.model = attempt_load(weights, map_location=self.device) # load FP32 model
self.stride = int(self.model.stride.max()) # model stride
self.imgsz = check_img_size(imgsz, s=self.stride) # check img_size
if self.half:
self.model.half() # to FP16
# Second-stage classifier
self.classify = False
if self.classify:
self.modelc = load_classifier(name='resnet101', n=2) # initialize
self.modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=self.device)['model']).to(
self.device).eval()
# read names and colors
self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names
self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names]
def detect(self, source):
if type(source) != list:
raise TypeError('source must be a list which contain pictures read by cv2')
dataset = MyLoadImages(source, img_size=self.imgsz, stride=self.stride) # imgsz
# Run inference
if self.device.type != 'cpu':
self.model(torch.zeros(1, 3, self.imgsz, self.imgsz).to(self.device).type_as(
next(self.model.parameters()))) # run once
# t0 = time.time()
result = []
'''
for path, img, im0s, vid_cap in dataset:'''
for img, im0s in dataset:
img = torch.from_numpy(img).to(self.device)
img = img.half() if self.half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
# t1 = time_synchronized()
pred = self.model(img, augment=self.opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, classes=self.opt.classes,
agnostic=self.opt.agnostic_nms)
# t2 = time_synchronized()
# Apply Classifier
if self.classify:
pred = apply_classifier(pred, self.modelc, img, im0s)
# Print time (inference + NMS)
# print(f'{s}Done. ({t2 - t1:.3f}s)')
# Process detections
det = pred[0]
# s += '%gx%g ' % img.shape[2:] # print string
# gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
result_txt = []
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Write results
for *xyxy, conf, cls in reversed(det):
# xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (int(cls.item()), [int(_.item()) for _ in xyxy], conf.item()) # label format
result_txt.append(line)
label = f'{self.names[int(cls)]} {conf:.2f}'
plot_one_box(xyxy, im0, label=label, color=self.colors[int(cls)], line_thickness=3)
result.append((im0, result_txt))
return result, self.names
修改 根目录下 utils/datasets.py 文件,在 logger = logging.getLogger(__name__) 后一行加入以下代码
class MyLoadImages: # for inference
def __init__(self, path, img_size=640, stride=32):
for img in path:
if type(img)!=np.ndarray or len(img.shape)!=3:
raise TypeError('there is a object which is not a picture read by cv2 in source')
'''
p = str(Path(path).absolute()) # os-agnostic absolute path
if '*' in p:
files = sorted(glob.glob(p, recursive=True)) # glob
elif os.path.isdir(p):
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
elif os.path.isfile(p):
files = [p] # files
else:
raise Exception(f'ERROR: {p} does not exist')
images = [x for x in files if x.split('.')[-1].lower() in img_formats]
videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
ni, nv = len(images), len(videos)
'''
self.img_size = img_size
self.stride = stride
self.files = path
self.nf = len(path)
#self.video_flag = [False] * ni + [True] * nv
self.mode = 'image'
#if any(videos):
#self.new_video(videos[0]) # new video
#else:
#self.cap = None
#assert self.nf > 0, f'No images or videos found in {p}. ' \
#f'Supported formats are:\nimages: {img_formats}\nvideos: {vid_formats}'
def __iter__(self):
self.count = 0
return self
def __next__(self):
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
'''
if self.video_flag[self.count]:
# Read video
self.mode = 'video'
ret_val, img0 = self.cap.read()
if not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nf: # last video
raise StopIteration
else:
path = self.files[self.count]
self.new_video(path)
ret_val, img0 = self.cap.read()
self.frame += 1
print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', end='')
'''
# Read image
self.count += 1
#img0 = cv2.imread(path) # BGR
#assert img0 is not None, 'Image Not Found ' + path
#print(f'image {self.count}/{self.nf} {path}: ', end='')
# Padded resize
img = letterbox(path, self.img_size, stride=self.stride)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
return img, path
接下来使用opencv并调用刚才的API来实现车道线检测
import cv2
import numpy as np
import detect_with_API
import torch
def get_edge_img(color_img, gaussian_ksize=5, gaussian_sigmax=1,
canny_threshold1=50, canny_threshold2=100):
"""
灰度化,模糊,canny变换,提取边缘
:param color_img: 彩色图,channels=3
"""
gaussian = cv2.GaussianBlur(color_img, (gaussian_ksize, gaussian_ksize),
gaussian_sigmax)
#gray_img = cv2.cvtColor(gaussian, cv2.COLOR_BGR2GRAY)
edges_img = cv2.Canny(gaussian, canny_threshold1, canny_threshold2)
return edges_img
def roi_mask(gray_img):
"""
对gray_img进行掩膜
:param gray_img: 灰度图,channels=1
"""
poly_pts = np.array([[[0, 368], [300, 210], [340, 210], [640, 368]]])
mask = np.zeros_like(gray_img)
mask = cv2.fillPoly(mask, pts=poly_pts, color=255)
img_mask = cv2.bitwise_and(gray_img, mask)
return img_mask
def get_lines(edge_img):
"""
获取edge_img中的所有线段
:param edge_img: 标记边缘的灰度图
"""
def calculate_slope(line):
"""
计算线段line的斜率
:param line: np.array([[x_1, y_1, x_2, y_2]])
:return:
"""
x_1, y_1, x_2, y_2 = line[0]
return (y_2 - y_1) / (x_2 - x_1)
def reject_abnormal_lines(lines, threshold=0.2):
"""
剔除斜率不一致的线段
:param lines: 线段集合, [np.array([[x_1, y_1, x_2, y_2]]),np.array([[x_1, y_1, x_2, y_2]]),...,np.array([[x_1, y_1, x_2, y_2]])]
"""
slopes = [calculate_slope(line) for line in lines]
while len(lines) > 0:
mean = np.mean(slopes)
diff = [abs(s - mean) for s in slopes]
idx = np.argmax(diff)
if diff[idx] > threshold:
slopes.pop(idx)
lines.pop(idx)
else:
break
return lines
def least_squares_fit(lines):
"""
将lines中的线段拟合成一条线段
:param lines: 线段集合, [np.array([[x_1, y_1, x_2, y_2]]),np.array([[x_1, y_1, x_2, y_2]]),...,np.array([[x_1, y_1, x_2, y_2]])]
:return: 线段上的两点,np.array([[xmin, ymin], [xmax, ymax]])
"""
x_coords = np.ravel([[line[0][0], line[0][2]] for line in lines])
y_coords = np.ravel([[line[0][1], line[0][3]] for line in lines])
poly = np.polyfit(x_coords, y_coords, deg=1)
point_min = (np.min(x_coords), np.polyval(poly, np.min(x_coords)))
point_max = (np.max(x_coords), np.polyval(poly, np.max(x_coords)))
return np.array([point_min, point_max], dtype=np.int)
# 获取所有线段
lines = cv2.HoughLinesP(edge_img, 1, np.pi / 180, 15, minLineLength=40,
maxLineGap=20)
# 按照斜率分成车道线
left_lines = [line for line in lines if calculate_slope(line) > 0]
right_lines = [line for line in lines if calculate_slope(line) < 0]
# 剔除离群线段
left_lines = reject_abnormal_lines(left_lines)
right_lines = reject_abnormal_lines(right_lines)
return least_squares_fit(left_lines), least_squares_fit(right_lines)
def draw_lines(img, lines):
"""
在img上绘制lines
:param img:
:param lines: 两条线段: [np.array([[xmin1, ymin1], [xmax1, ymax1]]), np.array([[xmin2, ymin2], [xmax2, ymax2]])]
:return:
"""
left_line, right_line = lines
cv2.line(img, tuple(left_line[0]), tuple(left_line[1]), color=(0, 255, 255),
thickness=5)
cv2.line(img, tuple(right_line[0]), tuple(right_line[1]),
color=(0, 255, 255), thickness=5)
def show_lane(color_img):
"""
在color_img上画出车道线
:param color_img: 彩色图,channels=3
:return:
"""
edge_img = get_edge_img(color_img)
mask_gray_img = roi_mask(edge_img)
lines = get_lines(mask_gray_img)
draw_lines(color_img, lines)
return color_img
if __name__ == '__main__':
# 打开视频
cap = cv2.VideoCapture('video.mp4')
a = detect_with_API.detectapi(weights='yolov7.pt')
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
outfile = cv2.VideoWriter('output.avi', fourcc, 25., (1280, 368))
#循环处理每一帧
with torch.no_grad():
while True:
rec, img = cap.read()
origin = np.copy(img)
img = show_lane(img)
result, names = a.detect([img])
img = result[0][0]
for cls, (x1, y1, x2, y2), conf in result[0][1]:
print(names[cls], x1, y1, x2, y2, conf) # 识别物体种类、左上角x坐标、左上角y轴坐标、右下角x轴坐标、右下角y轴坐标,置信度
'''
cv2.rectangle(img,(x1,y1),(x2,y2),(0,255,0))
cv2.putText(img,names[cls],(x1,y1-20),cv2.FONT_HERSHEY_DUPLEX,1.5,(255,0,0))'''
print() # 将每一帧的结果输出分开
output = np.concatenate((origin, img), axis=1)
#outfile.write(output)
cv2.imshow('video', img)
# 处理退出
if cv2.waitKey(1) == ord('q'):
cv2.destroyAllWindows()
break
效果如下
以上为全部内容!