【UE5】离线实时语音转文字插件教程

最新推荐文章于 2025-01-23 15:57:00 发布

城北不再美

最新推荐文章于 2025-01-23 15:57:00 发布

阅读量2.9k

点赞数 5

分类专栏： UE5 文章标签： ue5 语音识别虚幻 c++ 游戏引擎

本文链接：https://blog.csdn.net/qq_28660661/article/details/134461107

版权

UE5 专栏收录该内容

2 篇文章

订阅专栏

前言：该UE5.1项目实现了离线实时语音转文字并朗读输出结果的功能，能作为一个实现参考。

1.准备工作：

下载我打包好的插件、环境、模型：baiduyun，更多模型下载
安装UE5.1引擎，VS开发环境&编译器：Epic Launcher，Microsoft VS
新建空白C++项目后关闭引擎，并打开项目文件夹：
项目文件夹中放入下载的插件与语言模型：

1.项目目录/Plugins文件夹中放入下载解压好的VoskPlugin插件

2.项目目录/Vosk文件夹中放入下载解压好的语言服务器

3.项目目录/Vosk/install/Models文件夹中放入解压好的大,小中文模型

2.各项设置：

修改配置，项目支持语音： 项目目录/Config文件夹中修改DefaultEngine.ini，末尾添加配置项

[Voice]
bEnabled=true
[SystemSettings]
voice.SilenceDetectionThreshold=0.01

打开项目并在菜单栏-工具-新建C++类KTTKComponent（继承自VoskComponent）到Vosk插件中：
KTTKComponent.h中实现服务器初始化，各项默认配置，开启/关闭识别函数：

#pragma once

#include "CoreMinimal.h"
#include "Components/ActorComponent.h"

#include "VoskComponent.h"
#include "VoskServerParameters.h"
#include "ProcessHandleWrapper.h"
#include "Engine/World.h"
#include "TimerManager.h"
#include "Kismet/KismetSystemLibrary.h"

#include "KTTKComponent.generated.h"


UCLASS( ClassGroup=(Custom), meta=(BlueprintSpawnableComponent) )
class VOSKPLUGIN_API UKTTKComponent : public UVoskComponent
{
	GENERATED_BODY()

public:	
	UKTTKComponent();

public:
	FString ModelPath;
	bool BuildVoskSucess = false;
	TArray<FString> CommandLineArgs;

	UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="使用AI语言大模型"))
	bool UseBigModel = false;

	UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="AI语言大模型路径"))
	FString BigModelPath = UKismetSystemLibrary::GetProjectDirectory() + "Vosk/install/Models/vosk-model-cn-0.22";

	UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="AI语言小模型路径"))
	FString SmallModelPath = UKismetSystemLibrary::GetProjectDirectory() + "Vosk/install/Models/vosk-model-small-cn-0.22";

	UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="识别服务器程序"))
	FString VoskServerExe = UKismetSystemLibrary::GetProjectDirectory() + "Vosk/install/asr_server.exe";

	UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="识别服务器IP"))
	FString VoskServerIP = "127.0.0.1";

	UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="识别服务器端口", ClampMin = "1024", ClampMax = "65535"))
	int32 VoskServerPort = 25565;

	UPROPERTY(BlueprintReadWrite,category = "AI")//识别服务器配置
	FVoskServerParameters serverconfig;

	UPROPERTY(BlueprintReadOnly,category = "AI")//识别服务进程Handle
	FProcessHandleWrapper ProcessHandleVosk;

	UFUNCTION(BlueprintCallable,category = "AI",meta = (displayName = "开始识别"))
	void Start(FString iDeviceNameIn);

	UFUNCTION(BlueprintCallable,category = "AI",meta = (displayName = "停止识别"))
	void End(TArray<uint8>& CaptureData,int32& SamplesRecorded);

protected:
	FTimerHandle DelayTimeHandle;
	void DelayTimmer();
	void initlazi();
	void InitlaziVosk();

	virtual void BeginPlay() override;
	virtual void EndPlay(const EEndPlayReason::Type EndPlayReason) override;

public:	
	virtual void TickComponent(float DeltaTime, ELevelTick TickType, FActorComponentTickFunction* ThisTickFunction) override;
};

KTTKComponent中具体实现：

#include "KTTKComponent.h"

UKTTKComponent::UKTTKComponent()
{
	PrimaryComponentTick.bCanEverTick = true;
}

void UKTTKComponent::BeginPlay()
{
	Super::BeginPlay();

	if(UseBigModel==true)//Use Chinese Big Model or Small Model(使用中文语言大模型/小模型)
	{
		ModelPath = BigModelPath;
	}else{ModelPath = SmallModelPath;};
	FString FullPathOfProgramToRun = VoskServerExe;//服务器执行程序路径 

	serverconfig.PathToModel = ModelPath;//设置语言模型路径

	CommandLineArgs = BuildServerParameters(serverconfig,BuildVoskSucess);//创建执行命令

	CreateProcessV(ProcessHandleVosk,FullPathOfProgramToRun,CommandLineArgs,false,true,0);//创建执行识别进程

	initlazi();//进行识别服务器初始化
}

void UKTTKComponent::DelayTimmer()
{
	GetWorld()->GetTimerManager().SetTimer(DelayTimeHandle,this,&UKTTKComponent::InitlaziVosk,5.0f,false);
}

void UKTTKComponent::initlazi()
{
	if(IsInitialized()==false)//是否已经初始化识别服务
	{
		DelayTimmer();//延迟5秒执行开启识别服务器
	}else{return;};
}

void UKTTKComponent::InitlaziVosk()
{
	Initialize(VoskServerIP,VoskServerPort);
	GetWorld()->GetTimerManager().ClearTimer(DelayTimeHandle);//清除定时handle
}

void UKTTKComponent::Start(FString iDeviceNameIn)
{
	BeginCapture(iDeviceNameIn);//开始录制对话
}

void UKTTKComponent::End(TArray<uint8>& CaptureData,int32& SamplesRecorded)
{
	FinishCapture(CaptureData,SamplesRecorded);//结束录制对话
}

void UKTTKComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)
{
	Super::EndPlay(EndPlayReason);
	if(EndPlayReason == EEndPlayReason::Type::Quit || EndPlayReason == EEndPlayReason::Type::EndPlayInEditor || EndPlayReason == EEndPlayReason::Type::Destroyed)
	{
		KillProcess(ProcessHandleVosk);//停止识别服务器
	}
}

void UKTTKComponent::TickComponent(float DeltaTime, ELevelTick TickType, FActorComponentTickFunction* ThisTickFunction)
{
	Super::TickComponent(DeltaTime, TickType, ThisTickFunction);
}