腾讯云语音识别总结:cos存储到文字获取

该博客介绍了如何利用腾讯云的对象存储COS来存储尽职调查录音,并通过腾讯云ASR服务将录音转换成文字。首先,代码展示了如何上传文件到COS,并设置权限为公开读取。接着,利用腾讯云的API实现录音转文字,涉及了说话人分离功能。最后,演示了如何定期检查任务状态,直到转文字完成并保存结果。
摘要由CSDN通过智能技术生成
需求背景:对尽调录音进行录音整理。
设计:先将录音存储到腾讯云cos对象存储

(为啥选腾讯云,当然是腾讯的最便宜啊。。。免费的一个月10小时录音转文字,40g永久免费的存储)

代码开发:(直接抄腾讯云自带api生成)

# -*- coding=utf-8
# appid 已在配置中移除,请在参数 Bucket 中带上 appid。Bucket 由 BucketName-APPID 组成
# 1. 设置用户配置, 包括 secretId,secretKey 以及 Region
from qcloud_cos import CosConfig
from qcloud_cos import CosS3Client
import sys
import logging
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
secret_id = ''  # 替换为用户的 secretId
secret_key = ''  # 替换为用户的 secretKey

def send_file(path,file,secret_id,secret_key):
    region = 'ap-shanghai'  # 替换为用户的 Region
    token = None  # 使用临时密钥需要传入 Token,默认为空,可不填
    scheme = 'https'  # 指定使用 http/https 协议来访问 COS,默认为 https,可不填
    config = CosConfig(Region=region, SecretId=secret_id, SecretKey=secret_key, Token=token, Scheme=scheme)
    # 2. 获取客户端对象
    client = CosS3Client(config)
    # 参照下文的描述。或者参照 Demo 程序,详见 https://github.com/tencentyun/cos-python-sdk-v5/blob/master/qcloud_cos/demo.py
    filename = path+file
    response = client.upload_file(
        Bucket='cos-bucket',
        Key='/尽职调查/' + file,
        LocalFilePath=filename,
        PartSize=10,
        MAXThread=5,
    )

    response = client.put_object_acl(
        Bucket='cos-bucket',
        Key='/尽职调查/' + file,
        ACL='public-read'
    )


    url = 'cos的存储地址'+file
    return url

if __name__ == '__main__':

    send_file(r'/尽调/', '.m4a')
    print('OK')
import json
import base64
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.asr.v20190614 import asr_client, models
import urllib.parse

def send(data,secret_id,secret_key,style=0):
    try:
        cred = credential.Credential(secret_id, secret_key)
        httpProfile = HttpProfile()
        httpProfile.endpoint = "asr.tencentcloudapi.com"
        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        client = asr_client.AsrClient(cred, "", clientProfile)
        req = models.CreateRecTaskRequest()
        if style == 0:
            params = {
                "EngineModelType": "16k_zh",
                "ChannelNum": 1,
                "SpeakerDiarization": 1,#是否开启说话人分离
                "SpeakerNumber": 0,#说话人分离人数(需配合开启说话人分离使用),取值范围:0-10,0代表自动分离(目前仅支持≤6个人)
                "ResTextFormat": 2,
                "SourceType": 1,
                "Data":str(data,'utf-8'),
                "FilterModal": 1,
            }

        else:
            params = {
                "EngineModelType": "16k_zh",
                "ChannelNum": 1,
                "SpeakerDiarization": 1,  # 是否开启说话人分离
                "SpeakerNumber": 0,  # 说话人分离人数(需配合开启说话人分离使用),取值范围:0-10,0代表自动分离(目前仅支持≤6个人)
                "ResTextFormat": 2,
                "SourceType": 0,
                "Url": data,

                "FilterModal": 1,

            }

        #
        # params = {
        # "Url": data,
        # "ChannelNum": 1,
        # "EngineModelType": "16k_zh",
        # "ResTextFormat": 0,
        # "SourceType": 0,
        # "FilterModal": 1,
        #     "SpeakerDiarization": 1,  # 是否开启说话人分离
        #     "SpeakerNumber": 0,  # 说话人分离人数(需配合开启说话人分离使用),取值范围:0-10,0代表自动分离(目前仅支持≤6个人)
        #     }

        req.from_json_string(json.dumps(params))

        resp = client.CreateRecTask(req)
        #print(resp.to_json_string())
        user_dict = eval(resp.to_json_string())
        return user_dict['Data']['TaskId']

    except TencentCloudSDKException as err:
        print(err)


if __name__ == '__main__':
    data = '.m4a'
    http = ''
    a = send(http, secret_id,secret_key,style=2)
    with open('taskid.txt', 'a', encoding='utf8') as f:
        f.write(str(a) + ':' + data[:-4]+'\n')
    print(a)
import json
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.asr.v20190614 import asr_client, models
import time


secret_id = ''  # 替换为用户的 secretId
secret_key = ''  # 替换为用户的 secretKey

def get(id,text,secret_id,secret_key):
    try:

        while True:
            cred = credential.Credential(secret_id, secret_key)
            httpProfile = HttpProfile()
            httpProfile.endpoint = "asr.tencentcloudapi.com"

            clientProfile = ClientProfile()
            clientProfile.httpProfile = httpProfile
            client = asr_client.AsrClient(cred, "", clientProfile)

            req = models.DescribeTaskStatusRequest()
            params = {
                "TaskId": id  # 
            }

            req.from_json_string(json.dumps(params))
            resp = str(client.DescribeTaskStatus(req))
            user_dict = json.loads(resp)
            # print(user_dict)
            # user_dict = eval(resp.to_json_string())
            # print(user_dict['Data']['Result'])
            # print(user_dict['Data']['StatusStr'])


            if user_dict['Data']['StatusStr'] == 'success':
                with open(text, 'a', encoding='utf8') as f:
                    f.write(user_dict['Data']['Result'])
                print('完成success')
                break
            elif user_dict['Data']['StatusStr'] == 'failed':
                print('数据异常')
                break
            else:
                print("doing")
                time.sleep(10)


        # cred = credential.Credential("AKID0oMTM2Y11lHbP4ueWXJrAQWQXOK4Om9L", "4uc8QREVYN3dolRrYPzJMdhU7aqFPM4a")
        # httpProfile = HttpProfile()
        # httpProfile.endpoint = "asr.tencentcloudapi.com"
        #
        # clientProfile = ClientProfile()
        # clientProfile.httpProfile = httpProfile
        # client = asr_client.AsrClient(cred, "", clientProfile)
        #
        # req = models.DescribeTaskStatusRequest()
        # params = {
        #     "TaskId": id #
        # }
        #
        # req.from_json_string(json.dumps(params))
        # resp = str(client.DescribeTaskStatus(req))
        # user_dict = json.loads(resp)
        # print(user_dict)
        # #user_dict = eval(resp.to_json_string())
        # print(user_dict['Data']['Result'])
        # with open(text,'a',encoding='utf8') as f:
        #     f.write(user_dict['Data']['Result'])
        return 0
    except TencentCloudSDKException as err:
        print(err)
if __name__ == '__main__':
    text = '.txt'
    path = r'/尽调/'+text
    get(xxxxxxxxxx,path,secret_id,secret_key)
from get_tx import get
from send_tx import send
from send_file import send_file
import urllib.parse

secret_id = ''  # 替换为用户的 secretId
secret_key = ''  # 替换为用户的 secretKey

filename = ''+'.aac'
path = r'\尽调/'
s = send_file(path, filename,secret_id,secret_key)
print('OK')
data = filename
http = 'cosurl地址' +\
       urllib.parse.quote(data)
# https = 'https://.cos.ap-shanghai.myqcloud.com/%E5%B0%BD%E8%81%8C%.m4a'
a = send(http,secret_id,secret_key, style=2)
with open('taskid.txt', 'a', encoding='utf8') as f:
    f.write(str(a) + ':' + data[:-4] + '\n')
print(a)
text = filename.split('.')[0] + '.txt'
wpath = path + text
get(int(a), wpath,secret_id,secret_key)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值