语音处理-VC(voice conversion) 语音转换_API实现

最新推荐文章于 2024-05-09 09:35:32 发布

正经的曹同学

最新推荐文章于 2024-05-09 09:35:32 发布

阅读量1.8k

点赞数

分类专栏：语音文章标签： python

本文链接：https://blog.csdn.net/crh170/article/details/121031992

版权

语音专栏收录该内容

7 篇文章 0 订阅

订阅专栏

代码为标贝开源的资源来实现语音的转换VC

简单介绍：

需要一批VC的数据，通过现存的接口来进行调用达到白嫖数据，至此本文章内容就出来了，主要的文档格式还是人家API的包含，自己进行相关的信息的修改，增加了文件批量化的修改，参数的量化等等等等~

有问题欢迎来讨论

# -*- coding: utf-8

import argparse
import json
import wave
from scipy.io import wavfile
import sys
import requests
import websocket
from pathlib import Path
import fnmatch
import os
import time
'''
* 发音人列表
     *  邻家女声_静静 Vc_jingjing
     *  优雅女声_娇娇 Vc_jiaojiao
     *  绅士男生_天天 Vc_tiantian
     *  恐龙贝克_童声 Vc_baklong
     *  可爱女声_未眠 Vc_weimian
'''
class Client:
    def __init__(self, data, uri, save_path):
        self.data = data
        self.uri = uri
        self.converted_data = b""
        self.save_path = save_path

    # 建立连接
    def connect(self):
        ws_app = websocket.WebSocketApp(uri,
                                        on_open=self.on_open,
                                        on_message=self.on_message,
                                        on_error=self.on_error,
                                        on_close=self.on_close)
        ws_app.run_forever()

    # 建立连接后发送消息
    def on_open(self, ws):
        print("sending..")
        for message in self.data:
            ws.send(message, websocket.ABNF.OPCODE_BINARY)

    # 接收消息
    def on_message(self, ws, message):
        length = int.from_bytes(message[:4], byteorder='big', signed=False)
        json_data = json.loads((message[4: length + 4]).decode())
        #json_data = json.loads((message[4: length + 4]))
        self.converted_data += message[4 + length:]
        if json_data['lastpkg']:
            with wave.open(self.save_path, 'wb') as wavfile:
                wavfile.setparams((1, 2, 16000, 0, 'NONE', 'NONE'))
                wavfile.writeframes(self.converted_data)
                ws.close()
                print("task finished successfully")
        code = json.loads(message).get("errcode")
        print(str(json.loads(message)))


        if code != 0:
            # 打印接口错误
            print(message)


    # 打印错误
    def on_error(slef, ws, error):
        print("error: ", str(error))

    # 关闭连接
    def on_close(ws):
        print("client closed.")


# 准备数据
def prepare_data(args, access_token):
    # 填写Header信息
    voice_name = args.voice_name
    with open(args.file_path, 'rb') as f:
        file = f.read()
    data = []

    for i in range(0, len(file), 32000):
        if i + 32000 > len(file):
            tts_params = {"access_token": access_token, "voice_name": voice_name, 'enable_vad': True,
                          'align_input': True, "lastpkg": True}
        else:
            tts_params = {"access_token": access_token, "voice_name": voice_name, 'enable_vad': True,
                          'align_input': True, "lastpkg": False}
        json_data = json.dumps(tts_params)
        json_data_bi = json_data.encode()
        length = len(json_data)
        head_data = length.to_bytes(4, byteorder='big')

        if i + 32000 > len(file):
            data.append(head_data + json_data_bi + file[i:])
        else:
            data.append(head_data + json_data_bi + file[i: i + 32000])

    return data


# 获取命令行输入参数
def get_args(file_path,file_save_path,id,secert):
    text = "北京的天因为有你 才会变得这么美丽好看"
    parser = argparse.ArgumentParser(description='ASR')
    parser.add_argument('-client_secret', type=str, default=secert)
    parser.add_argument('-client_id', type=str, default=id)
    parser.add_argument('-file_path', type=str, default=file_path)
    parser.add_argument('-file_save_path', type=str, default=file_save_path)
    
    parser.add_argument('--voice_name', type=str, default='Vc_tiantian')
    args = parser.parse_args()

    return args


# 获取access_token用于鉴权
def get_access_token(client_secret, client_id):
    grant_type = "client_credentials"
    url = "https://openapi.data-baker.com/oauth/2.0/token?grant_type={}&client_secret={}&client_id={}" \
        .format(grant_type, client_secret, client_id)
    response = requests.post(url)
    try:
        #response = requests.post(url)
        response.raise_for_status()
    except Exception as e:
        print(response.text)
        raise Exception
    else:
        access_token = json.loads(response.text).get('access_token')
        return access_token

def find_all_files(files_path):
    """遍历指定文件夹所有指定类型文件"""
    p = Path(files_path)
    files_names = []  # 存储文件路径名称
    for file in p.rglob('*.wav'):  # 寻找所有wav文件
        x = str(file).split('\\')[-1]
        if fnmatch.fnmatch(x, '._*.wav'):
            continue
        else:
            files_names.append(str(file))  # 以字符串形式保存

    return files_names

if __name__ == '__main__':
    try:
        '''
        首先下进行批量化数据的导入
        文件存储位置：
            G:/1k
        文件读取位置
            G:/VC
        先读取所有的文件位置路径
        
        id和secret需要做一批量化的替换工作
        
        '''
        file_names = find_all_files('G:/1k')
        id = 'appid自己填'
        secret='appserect自己填'
        #获取所有的文件索引后，直接开始继续做之前的事情
        count = 0
        access_token = get_access_token(secret, id)
        #print(access_token)
        for filename in enumerate(file_names):
            #i是单独一个文件的名称
            print(type(filename))
            source_file_name = filename[1]
            file_save_name = filename[1].replace("1k", "VC")
            wave_name = filename[1].split('\\')[-1]
            file_save_package= file_save_name.replace("\\"+wave_name,'')
            print(file_save_name)
            if (os.path.exists(file_save_package)):
                print('1')
                file = open(file_save_name, 'w')
                file.close()
                #wavfile.write(wave_name,16000,None)

            else:
                os.makedirs(file_save_package)
                file = open(file_save_name, 'w')
                file.close()
               # wavfile.write(wave_name)
            args = get_args(filename[1],file_save_name,id=id,secert=secret)
            # 获取access_token
            client_secret = args.client_secret
            client_id = args.client_id
            
            #access_token = get_access_token(client_secret, client_id)
            #print(access_token)
            # 准备数据
            data = prepare_data(args, access_token)
            uri = "wss://openapi.data-baker.com/ws/voice_conversion"
            # 建立Websocket连接
            client = Client(data, uri, args.file_save_path)
            client.connect()
            #成功后直接将之前的文件进行删除工作 避免出现重复的情况
            #打印当前文件的进度，并且记录下来
            count+=1
            #count_str = count+" "
            print('当前文件'+source_file_name+'当前次数'+str(count))
            if os.path.exists(filename[1]):  # 原文件存在
                # 删除文件，可使用以下两种方法。
                os.remove(filename[1])
                print('文件已经删除了')
                # os.unlink(path)
            else:
                print('no such file:%s' % filename[1])
    except Exception as e:
        print(e)

欢迎交流

正经的曹同学

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
语音处理-VC(voice conversion) 语音转换_API实现

代码为标贝开源的资源来实现语音的转换VC# -*- coding: utf-8import argparseimport jsonimport wavefrom scipy.io import wavfileimport sysimport requestsimport websocketfrom pathlib import Pathimport fnmatchimport osimport time'''* 发音人列表 * 邻家女声_静静 Vc_jingj
复制链接

扫一扫

专栏目录