1 参考
根据B站up主阴沉的怪咖 开源的项目的基础上修改接入
AI二次元老婆开源项目地址(unity-AI-Chat-Toolkit):
Github地址:https://github.com/zhangliwei7758/unity-AI-Chat-Toolkit
Gitee地址:https://gitee.com/DammonSpace/unity-ai-chat-toolkit
2 官网参考
找到官网发现没C#的案例,于是参考python的脚本,改为C#
阿里全模态的官方地址:阿里云百炼
3 语音输入部分
在基类LLM里添加下属代码
public virtual void PostMsgAudio(string base64Audio, Action<string> _callback, Action<bool> _endCallBack = null, Action<AudioClip> _AudioCallBack = null)
{
//上下文条数设置
CheckHistory();
//提示词处理
string message = "当前为角色的人物设定:" + m_Prompt +
" 回答的语言:" + lan;
//缓存发送的信息列表
Content content = new Content()
{
type = "input_audio",
input_audio = new Input_audio()
{
data = string.Format("data:;base64,{0}", base64Audio),
format = "mp3"
}
};
Content content2 = new Content()
{
type = "text",
text = message
};
Content[] contents = new Content[] { content, content2 };
m_DataAudioList.Add(new SendDataAudio("user", contents));
StartCoroutine(RequestAudio(message, _callback, _endCallBack, _AudioCallBack));
}
public virtual IEnumerator RequestAudio(string _postWord, System.Action<string> _callback, Action<bool> _endCallBack = null, Action<AudioClip> _AudioCallBack = null)
{
yield return new WaitForEndOfFrame();
}
[Serializable]
public class SendDataAudio
{
[SerializeField] public string role;
[SerializeField] public Content[] content;
public SendDataAudio() { }
public SendDataAudio(string _role, Content[] _content)
{
role = _role;
content = _content;
}
}
[Serializable]
public class Content
{
[SerializeField] public string type;
[SerializeField] public Input_audio input_audio;
[SerializeField] public string text;
}
[Serializable]
public class Input_audio
{
[SerializeField] public string data;
[SerializeField] public string format;
}
4 语音解析部分
新添加一个类AliQwenOmniChat,继承LLM
using Newtonsoft.Json.Linq;
using System.Collections.Generic;
using System;
using UnityEngine;
using UnityEngine.Networking;
using System.Collections;
using static ALiChat;
public class AliQwenOmniChat : LLM
{
public AliQwenOmniChat()
{
url = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions";
}
/// <summary>
/// api key
/// </summary>
[SerializeField] private string api_key;
/// <summary>
/// AI设定
/// </summary>
public string m_SystemSetting = string.Empty;
/// <summary>
/// qwen-omni-turbo-0119
/// </summary>
public string m_gptModel = "qwen-omni-turbo-0119";
[Header("设置说话的声音")] public SpeekerRole per = SpeekerRole.Cherry;
private void Start()
{
//运行时,添加AI设定
m_DataList.Add(new SendData("system", m_SystemSetting));
}
/// <summary>
/// 发送消息
/// </summary>
/// <returns></returns>
public override void PostMsgAudio(string _msg, Action<string> _callback, Action<bool> endAction, Action<AudioClip> AudioAction)
{
base.PostMsgAudio(_msg, _callback, endAction, AudioAction);
}
public override IEnumerator RequestAudio(string requestData, Action<string> callback, Action<bool> EndAction, Action<AudioClip> AudioAction)
{
using (var request = new UnityWebRequest(url, "POST"))
{
PostDataAudio _postData = new PostDataAudio
{
model = m_gptModel,
stream = this.stream,
messages = m_DataAudioList,
temperature = 1,
top_p = 0.7f,
modalities = new string[] { "text", "audio" },
audio = new Audio { voice = SetSpeeker(per), format = "wav" },
stream_options = new Stream_options { include_usage = true },
};
string _jsonText = JsonUtility.ToJson(_postData).Trim();
Debug.Log(_jsonText);
byte[] data = System.Text.Encoding.UTF8.GetBytes(_jsonText);
request.uploadHandler = (UploadHandler)new UploadHandlerRaw(data);
request.downloadHandler = (DownloadHandler)new DownloadHandlerBuffer();
request.SetRequestHeader("Content-Type", "application/json");
request.SetRequestHeader("Authorization", string.Format("Bearer {0}", api_key));
yield return request.SendWebRequest();
if (request.result == UnityWebRequest.Result.ConnectionError || request.result == UnityWebRequest.Result.ProtocolError)
{
Debug.LogError("阿里Error: " + request.error);
callback?.Invoke("阿里大模型出现点问题");
yield break;
}
string temp = request.downloadHandler.text;
var datas = temp.Split("data:");
string textStr = "";
string audioStr = "";
foreach (var requestJson in datas)
{
if (string.IsNullOrEmpty(requestJson))
continue;
Debug.Log(requestJson);
var jsonP = JToken.Parse(requestJson);
var item = jsonP["choices"][0];
var audio = item["delta"].SelectToken("audio");
if (audio != null)
{
if (audio.SelectToken("transcript") != null)
{
var tt = audio.SelectToken("transcript")?.ToString();//文字部分
if (!string.IsNullOrEmpty(tt))
{
tt = tt.Trim();
textStr += tt;
}
var finish = item.SelectToken("finish_reason");
if (finish != null && finish.ToString() == "stop")
{
break;
}
}
else
{
audioStr += audio.SelectToken("data")?.ToString();//语音部分
}
}
}
if (!string.IsNullOrEmpty(textStr))
{
callback.Invoke(textStr);
}
if (!string.IsNullOrEmpty(audioStr))
{
AudioAction(PlayAudio(audioStr));
}
EndAction.Invoke(true);
}
}
//解析输出的Base64 编码的音频数据
AudioClip PlayAudio(string audioString)
{
if (!string.IsNullOrEmpty(audioString))
{
byte[] audioBytes = Convert.FromBase64String(audioString);
AudioClip audioClip = WavUtility.ConvertBytesToAudioClip(audioBytes, 24000);
return audioClip;
}
return null;
}
//阿里提供的四种支持的音色
private string SetSpeeker(SpeekerRole _role)
{
if (_role == SpeekerRole.Cherry) return "Cherry";
if (_role == SpeekerRole.Serena) return "Serena";
if (_role == SpeekerRole.Ethan) return "Ethan";
if (_role == SpeekerRole.Chelsie) return "Chelsie";
return "Cherry";//默认为音色Cherry
}
#region 数据包
[Serializable]
public class PostDataAudio
{
[SerializeField] public string model;
[SerializeField] public bool stream;
[SerializeField] public List<SendDataAudio> messages;
[SerializeField] public float temperature = 0.7f;
[SerializeField] public float top_p;
[SerializeField] public string[] modalities;
[SerializeField] public Audio audio;
[SerializeField] public Stream_options stream_options;
}
[Serializable]
public class Audio
{
public string voice;
public string format;
}
[Serializable]
public class Stream_options
{
public bool include_usage;
}
#endregion
public enum SpeekerRole
{
Cherry,
Serena,
Ethan,
Chelsie
}
}
5 测试
输入需要语音输入时,找到开源项目里的录音结束处理的AcceptClip方法修改为:
public bool AliQwenOmniChat = false;
private Queue<string> strDatas = new Queue<string>();
private Queue<AudioClip> clipDatas = new Queue<AudioClip>();
private bool end = true;
private void AcceptClip(AudioClip _audioClip)
{
if (m_ChatSettings.m_SpeechToText == null)
return;
if (AliQwenOmniChat)//阿里全模态语音输入时
{
byte[] _audioData = WavUtility.FromAudioClip(_audioClip);
string base64String = Convert.ToBase64String(_audioData);
m_ChatSettings.m_ChatModel.PostMsgAudio(base64String, CallBack, EndCallBack, AudioCallBack);//阿里语音输入
m_InputWord.text = "阿里语音输入完成";
}
else
{
m_ChatSettings.m_SpeechToText.SpeechToText(_audioClip, DealingTextCallback);
}
}
private void EndCallBack(bool isCompate)
{
Debug.Log("是否回到结束:" + isCompate);
this.end = isCompate;
}
private void CallBack(string _response)//文字回调
{
_response = _response.Trim();
//m_TextBack.text = "";
//Debug.Log("收到AI回复:" + _response);
if (GetMesssgeIndex == 0)
{
m_TextBack2.text = "";
//切换到说话动作
Debug.Log("播放声音******");
m_TextBack.text = "";
SetAnimator("state", 2);
}
GetMesssgeIndex++;
if (!string.IsNullOrEmpty(_response))
{
if (Ali)
//阿里多模态直接返回语音 放到队列里面
strDatas.Enqueue(_response);
else
m_ChatSettings.m_TextToSpeech.Speak(_response, PlayAudio);
}
//添加声音回调的方法
private void AudioCallBack(AudioClip clip)
{
clipDatas.Enqueue(clip);
}
private void Update()
{
if (AliQwenOmniChat)
{
if (strDatas.Count > 0 && m_WriteState == false)
{
StartTypeWords(strDatas.Dequeue());
}
if (clipDatas.Count > 0 && m_AudioSource.isPlaying == false)
{
m_AudioSource.clip = clipDatas.Dequeue();
m_AudioSource.Play();//返回的语音播放
isEnd = false;
}
else if (m_AudioSource.isPlaying == false && this.end)
{
if (isEnd)
{
return;
}
isEnd = true;
m_ChatHistory.Add(m_TextBack.text);
m_AudioSource.Stop();
resultDatas.Clear();
GetMesssgeIndex = 0;
切换到等待动作
Debug.Log("切换到等待动作");
SetAnimator("state", 0);
}
}
}
文字输入和开源项目里的原先输入一样。
语音输入测试:
我用声音问:你叫什么名字?
输入的打印:
语音输入后,返回了文字和声音,返回的打印: