简单的本机服务器端与客户端的通信(为后续搞语音聊天做准备)

最新推荐文章于 2024-05-19 23:31:32 发布

格瑞Lxf

最新推荐文章于 2024-05-19 23:31:32 发布

阅读量205

点赞数 1

文章标签： python

本文链接：https://blog.csdn.net/china_boy007/article/details/136510631

版权

服务器端代码：

#-*- coding:utf-8 -*-
from llm import get_answer
from urllib.request import urlretrieve
from config import vits_predict, vist_host, samplerate
import requests
from pydub import AudioSegment
import json
import queue
import sounddevice as sd
from vosk import Model, KaldiRecognizer
import sys

# Socket
import socket
ip_port = ('127.0.0.1', 9000)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM )
s.bind(ip_port)
s.listen(5)

# Voice Recognition
q = queue.Queue()
def int_or_str(text):
    """Helper function for argument parsing."""
    try:
        return int(text)
    except ValueError:
        return text

def callback(indata, frames, time, status):
    """This is called (from a separate thread) for each audio block."""
    if status:
        print(status, file=sys.stderr)
    q.put(bytes(indata))

def voice_input():
    return "222"
    model = Model(lang="cn")
    print("You:")
    with sd.RawInputStream(samplerate = samplerate, blocksize = 8000, device = sd.default.device,
                           dtype="int16", channels=1, callback=callback):

        rec = KaldiRecognizer(model, samplerate)
        while True:
            data = q.get()
            if rec.AcceptWaveform(data):
                a = json.loads(rec.Result())
                a = str(a['text'])
                a = ''.join(a.split())
                if(len(a) > 0):
                    user_input = a
                    stop_flag = 1
                    return user_input

def generate_sound(input_str):
    playload = {
        "fn_index":0,
        "data":[
            input_str,
            "hutao_zh",
            0.2,
            0.6,
            0.8,
            1,
            "ZH"
            ]
        }
    playload = json.dumps(playload)
    resp = requests.post(vits_predict, data = playload)
    resp = json.loads(resp.text)
    if resp["data"][0] == "Success":
        fname = resp["data"][1]["name"]
        file_url = vist_host + "file=" + fname
        save_fname = "output.wav"
        urlretrieve(url = file_url, filename = save_fname)        

if __name__ == "__main__":


    client, client_addr = s.accept()
    print("Sockets server runing!")
    total_data = bytes()

    inputMethod = int(client.recv(1024).decode())
    # Keyboard
    if inputMethod == 0:
        print("键盘输入")
    elif inputMethod == 1:
        print("语音输入")

    while True:
        if inputMethod == 0:
            total_data = bytes()
            while True:
                data = client.recv(1024)
                total_data += data
                if len(data) < 1024:
                    break
            question = total_data.decode()

        elif inputMethod == 1:
            question = voice_input()
            client.send(question.encode())

        print("接受到的提问: " + question)
        answer = get_answer(question)

        generate_sound(answer) # 生成音频

        # convert wav to ogg
        src = "./output.wav"
        dst = "G:/renpy_wife/girlfriend_hutao/game/audio/test.ogg"
        sound = AudioSegment.from_wav(src)
        sound.export(dst, format="ogg") # 音频段对象导出为 OGG 格式的文件，并保存到指定的目标路径 dst。
        # send response to UI
        # print(answer.encode())
        client.send(answer.encode())
        # finish playing audio
        print(client.recv(1024).decode())

客户端代码（测试）：

import os
import socket
import time

# Define the server address and port
server_address = ('127.0.0.1', 9000)

# Create a TCP/IP socket
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

# Connect the socket to the server
client_socket.connect(server_address)

try:
    # Send input method choice to the server
    input_method = input("Enter input method (0 for keyboard, 1 for voice): ")
    client_socket.sendall(input_method.encode())

    while True:
        print(1)

        if input_method == '0':  # Keyboard input
            # Enter your keyboard input logic here
            question = input("Your qestion: ")
            # Send the answer to the server
            client_socket.sendall(question.encode())
        elif input_method == '1':  # Voice input
            # Enter your voice input logic here1
            pass  # Placeholder for voice input logic

        data = client_socket.recv(1024).decode()
        print(data)

        # Play audio file
        audio_file_path = "G:/renpy_wife/girlfriend_hutao/game/audio/test.ogg"
        os.system("start " + audio_file_path)

        # Receive the question from the server
        question = client_socket.recv(1024).decode()
        if not question:
            break

        print("Received question: " + question)


finally:
    # Clean up the connection
    client_socket.close()

以下是代码解释（为了预防作者忘记）：

服务端不需要看其他函数，只需要看main函数，主要看思路

s = socket.socket(socket.AF_INET, socket.SOCK_STREAM )是创建了一个套接字对象，s.bind()将ip和端口绑定到创建的套接字上，使得服务器可以监听该地址和端口。

看主函数：

服务器：client, client_addr = s.accept()  # 等待客户端连接

客户端：client_socket.connect(server_address) # 连接服务器

#服务器：
inputMethod = int(client.recv(1024).decode())
#等待接收客户端信息

客户端：
input_method = input("Enter input method (0 for keyboard, 1 for voice): ")
client_socket.sendall(input_method.encode())
输入0或1并发送到服务器

服务器：
data = client.recv(1024)
如果是0，则等待接收客户端的信息

客户端：
question = input("Your qestion: ")
# Send the answer to the server
client_socket.sendall(question.encode())
输入内容并发送服务器

如果是选择音频输入，可以选择用whisper v3接收语音并转文字。

文字转语音可用sovits技术。最后生成音频保存到文件下output.wav

src = "./output.wav"
dst = "G:/renpy_wife/girlfriend_hutao/game/audio/test.ogg"
sound = AudioSegment.from_wav(src)
sound.export(dst, format="ogg") # 音频段对象导出为 OGG 格式的文件，并保存到指定的目标路径 dst。
# send response to UI
# print(answer.encode())
client.send(answer.encode())
# finish playing audio
print(client.recv(1024).decode())


服务器将生成的音频转成ogg格式，然后等待客户端播放音频后返回结束信息print(client.recv(1024).decode())

客服端播放音频：
        audio_file_path = "G:/renpy_wife/girlfriend_hutao/game/audio/test.ogg"
        os.system("start " + audio_file_path)
        client_socket.sendall("over".encode())

格瑞Lxf

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
简单的本机服务器端与客户端的通信(为后续搞语音聊天做准备)

s = socket.socket(socket.AF_INET, socket.SOCK_STREAM )是创建了一个套接字对象，s.bind()将ip和端口绑定到创建的套接字上，使得服务器可以监听该地址和端口。客户端：client_socket.connect(server_address) # 连接服务器。服务器：client, client_addr = s.accept() # 等待客户端连接。文字转语音可用sovits技术。服务端不需要看其他函数，只需要看main函数，主要看思路。
复制链接

扫一扫