基于上一篇文章,对整体进行了升级。
升级内容:不再把图片流保存为图片,而是在提问时将图片转为视频,通过MiniCPM的视频解析功能对内容进行解析
前端唯一改动【将上传频率由1000改为20或者10】,目的是为了给后台生成的视频提供更多帧数,从而生成正常可解析视频。
后端改动:
1.接收方法只做缓存,不再保存图片
def videostream():
if 'video' not in request.files:
return 'No file part', 400
video_file = request.files['video']
if video_file.filename == '':
return 'No selected file', 400
# 读取视频流并进行处理
nparr = np.frombuffer(video_file.read(), np.uint8)
frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
queue.append(frame)
return jsonify({'message': {"key":queuec , "value" : "解析完成,可继续提问"}, 'success': True}), 200
2.解析部分的修改,里面的路径自己修改呦
@app.route('/askvideostream', methods=['POST'])
def ask_videostream():
imgkey = request.form.get("key")
question = request.form.get("q")
height, width, layers = list(queue)[0].shape
# 创建视频写入对象
fps = 30
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 使用 MP4 编解码器
videos = cv2.VideoWriter(os.path.join(app.config['UPLOAD_video_FOLDER'] , 'look.mp4'), fourcc, fps, (width, height))
# 遍历所有图片并写入视频
for image in list(queue):
videos.write(image) # 写入每一帧
# 释放视频写入对象
videos.release()
def uniform_sample(l, n):
gap = len(l) / n
idxs = [int(i * gap + gap / 2) for i in range(n)]
return [l[i] for i in idxs]
vr = VideoReader(os.path.join(app.config['UPLOAD_video_FOLDER'] , 'look.mp4'))
sample_fps = round(vr.get_avg_fps() / 1) # FPS
frame_idx = [i for i in range(0, len(vr), sample_fps)]
if len(frame_idx)>MAX_NUM_FRAMES:
frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
video = vr.get_batch(frame_idx).asnumpy()
video = [Image.fromarray(v.astype('uint8')) for v in video]
msgs = [{'role': 'user', 'content': video + [question]}]
default_params = {"use_image_id":False, "max_slice_nums": 1}
res = model.chat(
image=None,
msgs=msgs,
tokenizer=tokenizer,
**default_params
)
#msgs = [{'role': 'user', 'content': list(queue) + [question]}]
# res = model.chat(
# image=None,
# msgs=msgs,
# tokenizer=tokenizer
# )
return jsonify({'message': res, 'success': True}), 200