python 音频转文本.exe 源码实现长音频文字提取转文本文件

学习ing的码农

已于 2022-09-27 09:11:00 修改

阅读量1.6k

点赞数 1

分类专栏： pydub 音频转文本文章标签： python pycharm

于 2022-07-31 17:12:56 首次发布

本文链接：https://blog.csdn.net/qq_52162965/article/details/126087765

版权

pydub 同时被 2 个专栏收录

2 篇文章 2 订阅

订阅专栏

音频转文本

1 篇文章 0 订阅

订阅专栏

import tkinter as tk
from tkinter import ttk
from tkinter.filedialog import *
from tkinter.messagebox import showinfo
from aip import AipSpeech
from pydub import AudioSegment
from pydub.silence import split_on_silence
import os
import shutil
import subprocess

#选择文件
def choice_file():
    global filepath
    filepath = askopenfilename()  # 选择打开什么文件，返回文件名
    filename.set(filepath)        # 设置变量filename的值

#选择文件格式
def choice_file_type(event):
    global newfiletype
    newfiletype=com.get()
    print(com.get())
    print(newtype.get())


#选择保存位置
def choice_file_folder():
    global filefolderPath
    filefolderPath = askdirectory()  # 选择目录，返回目录名
    outputpath.set(filefolderPath)  # 设置变量outputpath的值

#对音频文件处理
def filehandle():
    # 初始化
    global filepath
    global filename
    global filefolderPath
    global newfiletype
    tk.Button(win, text='转换中。。。', bd=1, bg='dodgerblue', fg='white', width=38).place(x=60, y=196)
    # 设置进度条
    progress_bar_num = 0
    progressbarOne = tk.ttk.Progressbar(win,length=274)
    progressbarOne.place(x=60, y=228)
    # 进度值最大值
    progressbarOne['maximum'] = 100

    newfilename = newname.get()#获取新文件名

    cut_filepath = filepath[0:-4]  # 去后缀名

    # 操作subprocess模块，转换音频格式为mp3
    subprocess.call(('ffmpeg -loglevel quiet -y -i ' + filepath + ' -f mp3 ' + cut_filepath + '.mp3'),shell=True, stdin=subprocess.PIPE)

    filepath = cut_filepath + '.mp3'
    audiotype = 'mp3'  # 如果wav、mp4其他格式参看pydub.AudioSegment的API

    # 读入音频
    print('读入音频')
    #sound = open(filepath,'w+b')
    sound = AudioSegment.from_mp3(filepath)
    #sound = sound[:3*60*1000] #如果文件较大，先取前3分钟测试，根据测试结果，调整参数
    # 分割
    print('开始分割')
    chunks = split_on_silence(sound, min_silence_len=300, silence_thresh=-40,
                              keep_silence=True)
    # min_silence_len: 拆分语句时，静默满0.5秒则拆分。silence_thresh：小于-45dBFS以下的为静默。keep_silence：True保留静音片段

    os.remove(cut_filepath+'.mp3')#删除转换后的MP3文件
    # 创建保存目录
    filepath = os.path.split(filepath)[0]
    chunks_path = filepath + '/chunks/'
    if not os.path.exists(chunks_path): os.mkdir(chunks_path)
    # 保存所有分段
    print('开始保存')
    nowlen = 0  # 当前总毫秒数

    #新建文件
    f = open(filefolderPath+'/'+newfilename+newfiletype,"a")
    f.write(newfilename)

    progress_bar_block = 100/len(chunks)/2

    for i in range(len(chunks)):
        new = chunks[i]
        if len(chunks[i]) >= 1500 and len(chunks[i]) <= 10000:  # 只选择长度大于2s小于10s的片段
            save_name = chunks_path + '%04d.%s' % (i, audiotype)
            new.export(save_name, format=audiotype)
        progress_bar_num = progress_bar_num + progress_bar_block
        progressbarOne['value'] = progress_bar_num  # 进度条长度
        win.update()

    for i in range(len(chunks)):
        new = chunks[i]
        if len(chunks[i]) >= 1500 and len(chunks[i]) <= 10000:  # 只选择长度大于2s小于10s的片段
            save_name = chunks_path + '%04d.%s' % (i, audiotype)
            #new.export(save_name, format=audiotype)
            nowsecond = nowlen / 1000  # 得当前总秒数
            second = nowsecond % 60  # 秒
            minute = (nowsecond / 60) % 60  # 分
            #hour = (nowsecond / 3600) % 24  # 时
            timedata='\n\n'+str('%02d:%02d'%(minute, second))+'\n'
            f.write(timedata)
            f.write(mp3_characters(save_name))
            #print("%04d %02d:%02d" % (j, minute, second), nowlen)
        nowlen = nowlen + len(new)  # 当前毫秒数

        progress_bar_num=progress_bar_num+progress_bar_block
        progressbarOne['value'] = progress_bar_num  # 进度条长度
        win.update()
    f.close()
    print('保存完毕')
    shutil.rmtree(chunks_path)#删除文件夹
    showinfo(message='转换成功！')

    #清空状态
    progressbarOne['value'] = 0  # 进度条长度
    tk.Label(win, text='', bd=0, bg='white', fg='white', width=40).place(x=60, y=228)
    tk.Label(win, text='', bd=0, bg='white', fg='white', width=40).place(x=60, y=232)
    filepath =''
    filefolderPath = ''
    filename.set(filepath)  # 设置变量filename的值
    outputpath.set(filefolderPath)  # 设置变量outputpath的值
    newname.set("")
    newtype.set('.doc')
    # 构建“转换”这一行的标签、输入框以及启动按钮
    tk.Button(win, text='开始转换', command=filehandle, bd=1, bg='dodgerblue', fg='white', width=38).place(x=60, y=196)

#语音转pcm，pcm处理
def mp3_characters(filepath):

    cur_dir = os.path.dirname(os.path.abspath(__file__))  # 获取上级目录
#    print(cur_dir)
    cut_filepath=filepath[0:-4] #去后缀名

    # 操作subprocess，转换音频格式
    cmd = 'ffmpeg -loglevel quiet -y -i ' + filepath + '  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 ' + cut_filepath + '.pcm'
    subprocess.call(cmd, shell=True,stdin=subprocess.PIPE)
    

    #百度云接口，需要主动申请
    APP_ID = ''
    API_KEY = ''
    SECRET_KEY = ''

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    with open(cut_filepath + '.pcm', 'rb') as fp:
        au = fp.read()
    res = client.asr(au, 'pcm', 16000, {'dev_pid': 1537, })

    return "".join(res['result'])

#创建主窗口
win = tk.Tk()

#设置标题
win.title("语音转文本")
#设置背景
canvas = tk.Canvas(win, bg="white", height=400, width=700, borderwidth=-3)  # 创建画布
canvas.pack(side='top') # 放置画布（为上端）
image_file = tk.PhotoImage(file="背景.png")  # 加载图片文件
canvas.create_image(0, 0, anchor='nw', image=image_file)  # 将图片置于画布上

#设置大小和位置
win.geometry("400x300+400+100")
#固定页面大小
win.resizable(False,False)
#程序图标
win.iconbitmap("程序图标.ico")

#获取输入框的文本
filename = tk.StringVar()
outputpath = tk.StringVar()
newname = tk.StringVar()
newtype = tk.StringVar()

#全局变量
filepath = ''#待处理文件
filefolderPath = ''#新文件保存位置
newfiletype = '.doc'#新文件类型
newfilename =''#新文件名
progress_bar_num=0#进度条进度

# 构建“选择文件”这一行的标签、输入框以及启动按钮，同时我们希望当用户选择图片之后能够显示原图的基本信息
tk.Label(win, text='选择文件',bg='white').place(x=60,y=65)
tk.Entry(win, textvariable=filename,bg='ghostwhite').place(height=26,x=115,y=65)
tk.Button(win, text='打开文件', command=choice_file,bd=1,bg='white',padx=7,pady=0).place(x=265,y=65)

# 下拉菜单选择文件类型
tk.Label(win, text='新文件名',bg='white').place(x=60,y=150)
tk.Entry(win, textvariable=newname,bg='ghostwhite').place(height=26,x=115,y=150)
com = ttk.Combobox(win, textvariable=newtype)  #创建下拉菜单
com.place(height=26,width=70,x=265,y=150)  #位置
com["value"] = (".doc", ".docx", ".txt")    #给下拉菜单设定值
com.current(0) #设定下拉菜单的默认值为第1个，即.doc
com.bind("<<ComboboxSelected>>", choice_file_type)     # #给下拉菜单绑定事件

# 构建“选择文件夹”这一行的标签、输入框以及启动按钮
tk.Label(win, text='保存位置',bg='white').place(x=60,y=110)
tk.Entry(win, textvariable=outputpath,bg='ghostwhite').place(height=26,x=115,y=110)
tk.Button(win, text='选择文件夹', command=choice_file_folder,bd=1,bg='white',pady=0).place(x=265,y=110)

# 构建“转换”这一行的标签、输入框以及启动按钮
tk.Button(win, text='开始转换', command=filehandle,bd=1,bg='dodgerblue',fg='white',width=38).place(x=60,y=196)



win.mainloop()