服务器端代码:
#-*- coding:utf-8 -*-
from llm import get_answer
from urllib.request import urlretrieve
from config import vits_predict, vist_host, samplerate
import requests
from pydub import AudioSegment
import json
import queue
import sounddevice as sd
from vosk import Model, KaldiRecognizer
import sys
# Socket
import socket
ip_port = ('127.0.0.1', 9000)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM )
s.bind(ip_port)
s.listen(5)
# Voice Recognition
q = queue.Queue()
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(bytes(indata))
def voice_input():
return "222"
model = Model(lang="cn")
print("You:")
with sd.RawInputStream(samplerate = samplerate, blocksize = 8000, device = sd.default.device,
dtype="int16", channels=1, callback=callback):
rec = KaldiRecognizer(model, samplerate)
while True:
data = q.get()
if rec.AcceptWaveform(data):
a = json.loads(rec.Result())
a = str(a['text'])
a = ''.join(a.split())
if(len(a) > 0):
user_input = a
stop_flag = 1
return user_input
def generate_sound(input_str):
playload = {
"fn_index":0,
"data":[
input_str,
"hutao_zh",
0.2,
0.6,
0.8,
1,
"ZH"
]
}
playload = json.dumps(playload)
resp = requests.post(vits_predict, data = playload)
resp = json.loads(resp.text)
if resp["data"][0] == "Success":
fname = resp["data"][1]["name"]
file_url = vist_host + "file=" + fname
save_fname = "output.wav"
urlretrieve(url = file_url, filename = save_fname)
if __name__ == "__main__":
client, client_addr = s.accept()
print("Sockets server runing!")
total_data = bytes()
inputMethod = int(client.recv(1024).decode())
# Keyboard
if inputMethod == 0:
print("键盘输入")
elif inputMethod == 1:
print("语音输入")
while True:
if inputMethod == 0:
total_data = bytes()
while True:
data = client.recv(1024)
total_data += data
if len(data) < 1024:
break
question = total_data.decode()
elif inputMethod == 1:
question = voice_input()
client.send(question.encode())
print("接受到的提问: " + question)
answer = get_answer(question)
generate_sound(answer) # 生成音频
# convert wav to ogg
src = "./output.wav"
dst = "G:/renpy_wife/girlfriend_hutao/game/audio/test.ogg"
sound = AudioSegment.from_wav(src)
sound.export(dst, format="ogg") # 音频段对象导出为 OGG 格式的文件,并保存到指定的目标路径 dst。
# send response to UI
# print(answer.encode())
client.send(answer.encode())
# finish playing audio
print(client.recv(1024).decode())
客户端代码(测试):
import os
import socket
import time
# Define the server address and port
server_address = ('127.0.0.1', 9000)
# Create a TCP/IP socket
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# Connect the socket to the server
client_socket.connect(server_address)
try:
# Send input method choice to the server
input_method = input("Enter input method (0 for keyboard, 1 for voice): ")
client_socket.sendall(input_method.encode())
while True:
print(1)
if input_method == '0': # Keyboard input
# Enter your keyboard input logic here
question = input("Your qestion: ")
# Send the answer to the server
client_socket.sendall(question.encode())
elif input_method == '1': # Voice input
# Enter your voice input logic here1
pass # Placeholder for voice input logic
data = client_socket.recv(1024).decode()
print(data)
# Play audio file
audio_file_path = "G:/renpy_wife/girlfriend_hutao/game/audio/test.ogg"
os.system("start " + audio_file_path)
# Receive the question from the server
question = client_socket.recv(1024).decode()
if not question:
break
print("Received question: " + question)
finally:
# Clean up the connection
client_socket.close()
以下是代码解释(为了预防作者忘记):
服务端不需要看其他函数,只需要看main函数,主要看思路
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM )是创建了一个套接字对象,s.bind()将ip和端口绑定到创建的套接字上,使得服务器可以监听该地址和端口。
看主函数:
服务器:client, client_addr = s.accept() # 等待客户端连接
客户端:client_socket.connect(server_address) # 连接服务器
#服务器:
inputMethod = int(client.recv(1024).decode())
#等待接收客户端信息
客户端:
input_method = input("Enter input method (0 for keyboard, 1 for voice): ")
client_socket.sendall(input_method.encode())
输入0或1并发送到服务器
服务器:
data = client.recv(1024)
如果是0,则等待接收客户端的信息
客户端:
question = input("Your qestion: ")
# Send the answer to the server
client_socket.sendall(question.encode())
输入内容并发送服务器
如果是选择音频输入,可以选择用whisper v3接收语音并转文字。
文字转语音可用sovits技术。最后生成音频保存到文件下output.wav
src = "./output.wav"
dst = "G:/renpy_wife/girlfriend_hutao/game/audio/test.ogg"
sound = AudioSegment.from_wav(src)
sound.export(dst, format="ogg") # 音频段对象导出为 OGG 格式的文件,并保存到指定的目标路径 dst。
# send response to UI
# print(answer.encode())
client.send(answer.encode())
# finish playing audio
print(client.recv(1024).decode())
服务器将生成的音频转成ogg格式,然后等待客户端播放音频后返回结束信息print(client.recv(1024).decode())
客服端播放音频:
audio_file_path = "G:/renpy_wife/girlfriend_hutao/game/audio/test.ogg"
os.system("start " + audio_file_path)
client_socket.sendall("over".encode())