前言:该UE5.1项目实现了离线实时语音转文字并朗读输出结果的功能,能作为一个实现参考。
1.准备工作:
-
下载我打包好的插件、环境、模型:baiduyun,更多模型下载
-
安装UE5.1引擎,VS开发环境&编译器:Epic Launcher,Microsoft VS
-
新建空白C++项目后关闭引擎,并打开项目文件夹:
-
项目文件夹中放入下载的插件与语言模型:
1.项目目录/Plugins文件夹中放入下载解压好的VoskPlugin插件
2.项目目录/Vosk文件夹中放入下载解压好的语言服务器
3.项目目录/Vosk/install/Models文件夹中放入解压好的大,小中文模型
2.各项设置:
-
修改配置,项目支持语音:
项目目录/Config文件夹中修改DefaultEngine.ini,末尾添加配置项
-
[Voice] bEnabled=true [SystemSettings] voice.SilenceDetectionThreshold=0.01
-
打开项目并在菜单栏-工具-新建C++类KTTKComponent(继承自VoskComponent)到Vosk插件中:
-
KTTKComponent.h中实现服务器初始化,各项默认配置,开启/关闭识别函数:
#pragma once
#include "CoreMinimal.h"
#include "Components/ActorComponent.h"
#include "VoskComponent.h"
#include "VoskServerParameters.h"
#include "ProcessHandleWrapper.h"
#include "Engine/World.h"
#include "TimerManager.h"
#include "Kismet/KismetSystemLibrary.h"
#include "KTTKComponent.generated.h"
UCLASS( ClassGroup=(Custom), meta=(BlueprintSpawnableComponent) )
class VOSKPLUGIN_API UKTTKComponent : public UVoskComponent
{
GENERATED_BODY()
public:
UKTTKComponent();
public:
FString ModelPath;
bool BuildVoskSucess = false;
TArray<FString> CommandLineArgs;
UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="使用AI语言大模型"))
bool UseBigModel = false;
UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="AI语言大模型路径"))
FString BigModelPath = UKismetSystemLibrary::GetProjectDirectory() + "Vosk/install/Models/vosk-model-cn-0.22";
UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="AI语言小模型路径"))
FString SmallModelPath = UKismetSystemLibrary::GetProjectDirectory() + "Vosk/install/Models/vosk-model-small-cn-0.22";
UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="识别服务器程序"))
FString VoskServerExe = UKismetSystemLibrary::GetProjectDirectory() + "Vosk/install/asr_server.exe";
UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="识别服务器IP"))
FString VoskServerIP = "127.0.0.1";
UPROPERTY(BlueprintReadWrite,EditAnywhere,category = "AI",meta = (displayName="识别服务器端口", ClampMin = "1024", ClampMax = "65535"))
int32 VoskServerPort = 25565;
UPROPERTY(BlueprintReadWrite,category = "AI")//识别服务器配置
FVoskServerParameters serverconfig;
UPROPERTY(BlueprintReadOnly,category = "AI")//识别服务进程Handle
FProcessHandleWrapper ProcessHandleVosk;
UFUNCTION(BlueprintCallable,category = "AI",meta = (displayName = "开始识别"))
void Start(FString iDeviceNameIn);
UFUNCTION(BlueprintCallable,category = "AI",meta = (displayName = "停止识别"))
void End(TArray<uint8>& CaptureData,int32& SamplesRecorded);
protected:
FTimerHandle DelayTimeHandle;
void DelayTimmer();
void initlazi();
void InitlaziVosk();
virtual void BeginPlay() override;
virtual void EndPlay(const EEndPlayReason::Type EndPlayReason) override;
public:
virtual void TickComponent(float DeltaTime, ELevelTick TickType, FActorComponentTickFunction* ThisTickFunction) override;
};
-
KTTKComponent中具体实现:
#include "KTTKComponent.h"
UKTTKComponent::UKTTKComponent()
{
PrimaryComponentTick.bCanEverTick = true;
}
void UKTTKComponent::BeginPlay()
{
Super::BeginPlay();
if(UseBigModel==true)//Use Chinese Big Model or Small Model(使用中文语言大模型/小模型)
{
ModelPath = BigModelPath;
}else{ModelPath = SmallModelPath;};
FString FullPathOfProgramToRun = VoskServerExe;//服务器执行程序路径
serverconfig.PathToModel = ModelPath;//设置语言模型路径
CommandLineArgs = BuildServerParameters(serverconfig,BuildVoskSucess);//创建执行命令
CreateProcessV(ProcessHandleVosk,FullPathOfProgramToRun,CommandLineArgs,false,true,0);//创建执行识别进程
initlazi();//进行识别服务器初始化
}
void UKTTKComponent::DelayTimmer()
{
GetWorld()->GetTimerManager().SetTimer(DelayTimeHandle,this,&UKTTKComponent::InitlaziVosk,5.0f,false);
}
void UKTTKComponent::initlazi()
{
if(IsInitialized()==false)//是否已经初始化识别服务
{
DelayTimmer();//延迟5秒执行开启识别服务器
}else{return;};
}
void UKTTKComponent::InitlaziVosk()
{
Initialize(VoskServerIP,VoskServerPort);
GetWorld()->GetTimerManager().ClearTimer(DelayTimeHandle);//清除定时handle
}
void UKTTKComponent::Start(FString iDeviceNameIn)
{
BeginCapture(iDeviceNameIn);//开始录制对话
}
void UKTTKComponent::End(TArray<uint8>& CaptureData,int32& SamplesRecorded)
{
FinishCapture(CaptureData,SamplesRecorded);//结束录制对话
}
void UKTTKComponent::EndPlay(const EEndPlayReason::Type EndPlayReason)
{
Super::EndPlay(EndPlayReason);
if(EndPlayReason == EEndPlayReason::Type::Quit || EndPlayReason == EEndPlayReason::Type::EndPlayInEditor || EndPlayReason == EEndPlayReason::Type::Destroyed)
{
KillProcess(ProcessHandleVosk);//停止识别服务器
}
}
void UKTTKComponent::TickComponent(float DeltaTime, ELevelTick TickType, FActorComponentTickFunction* ThisTickFunction)
{
Super::TickComponent(DeltaTime, TickType, ThisTickFunction);
}
3.编译并测试:
-
生成并打开该项目:
-
蓝图中创建游戏模式GM_STTS,玩家控制器PC_STTS:
1.该游戏模式中选定该玩家控制器,并将该游戏模式设置到当前地图的世界场景设置;
2.玩家控制器中添加组件:KTTK,设置默认配置值(是否使用大的语言模型,模型自定义路径,本机或远程语言服务器IP端口),实现测试蓝图逻辑;
-
电脑插入麦克风,运行关卡,按下测试按键即可测试回调输出打印字符。
-
输出最终识别结果时朗读结果:蓝图中的TTSSpeech来自另一插件-TTSPluginMeoPlay(请自行寻找该插件)。