从创建环境到各种模块安装以及报错解决。
cv2.error:Unkonwn C++ ecception from Opencv code
报错就不一一列举出来了。看截图。
首先我用C命中GPT写了我的需求
然后GPT给出代码:
import cv2
import numpy as np
import pyttsx3
import speech_recognition as sr
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
# Initialize YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
#源代码#output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
output_layers = [layer_names[i[0]-1] for i in net.getUnconnectedOutLayers()]
# Initialize STT
r = sr.Recognizer()
mic = sr.Microphone()
# Initialize GPT2
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
# Initialize TTS
engine = pyttsx3.init()
# Start video capture
cap = cv2.VideoCapture(0)
while True:
# Read frame from camera
ret, frame = cap.read()
# Detect objects using YOLO
height, width, channels = frame.shape
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
# Get object names using STT and send to GPT2
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
roi = frame[y:y+h, x:x+w]
cv2.imshow("ROI", roi)
cv2.waitKey(1)
with mic as source:
print("Listening...")
audio = r.listen(source)
try:
print("Recognizing...")
text = r.recognize_google(audio)
input_text = label + " is " + text
input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
output_ids = model.generate(input_ids, max_length=1000, do_sample=True)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("GPT2 output: " + output_text)
engine.say(output_text)
engine.runAndWait()
except:
pass
# Display frame
cv2.imshow("Frame", frame)
# Exit on 'q' key press
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release resources
cap.release()
cv2.destroyAllWindows()
这是一段Python程序,主要实现了通过YOLOv3检测视频中的物体,并使用语音识别技术将物体的名称转换成文本。然后,将文本输入到GPT4模型中,生成与该物体相关的自然语言信息,并使用语音合成系统将其转换为语音。这个程序还包括了一些常见的计算机视觉和自然语言处理库,如OpenCV,PyTorch,Transformers,SpeechRecognition和pyttsx3等。通过这个程序,我们可以在实时视频中获取物体的详细描述。
开始教程
五一快乐
配置环境把3.8改为3.7直接cmd
用以下命令安装ultralytic
安装好了,进行代码实测报错
查资料
我的版本是4.7.0.72
安装教程安装4.1.2.30版本
安装到了卸载原模块的时候报错没有权限
安装模块时在install 后加上 --user
继续报错不能安装
找到当前环境模块路径找到带~符号开头的文件直接删除
再次安装 成功
运行代码
成功