音频&深度学习Lesson11_UrbanSound8k的语谱图

该代码定义了一个名为UrbanSound8k的自定义数据集类,用于处理音频数据。它从指定目录加载音频文件,进行重采样、调整长度和混合操作。使用MelSpectrogram转换器对波形进行处理,并返回Mel谱图、采样率和目标标签。
摘要由CSDN通过智能技术生成
import torch.nn.functional
import torchaudio
from torch.utils.data import Dataset
import pandas as pd # 下载慢用pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -U pandas
import os
from Code.Chapter1.plot_set import plot_waveform,plot_spectrogram
class UrbanSound8k(Dataset):

    def __init__(self,root_dir,excel_dir,transforms,target_samples,target_sr) -> None:
        super().__init__()
        self.root_dir = root_dir
        self.excel = pd.read_csv(excel_dir)
        self.transforms = transforms
        self.target_samples = target_samples # 长度
        self.target_sr = target_sr # 采样率

    def reshape_if_necessary(self,waveform):
        if waveform.shape[1] > self.target_samples :
            waveform = waveform[:,0:self.target_samples]
        elif waveform.shape[1] < self.target_samples :
            num_pad =self.target_samples - waveform.shape[1]
            waveform = torch.nn.functional.pad(waveform,pad=(0,num_pad))
        return waveform

    def resample_if_necessary(self,waveform,sr):
        if sr != self.target_sr:
            waveform = torchaudio.functional.resample(waveform,orig_freq=sr,new_freq=self.target_sr)
        return waveform

    def mix_if_necessary(self,waveform):
        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform,dim=0,keepdim=True)
        return waveform


# root_dir = r"C:\Users\LENOVO\PycharmProjects\pythonProject3\Code\Chapter2\UrbanSound8k\audio"
# excel_dir = r"C:\Users\LENOVO\PycharmProjects\pythonProject3\Code\Chapter2\UrbanSound8k\UrbanSound8K.csv"
# excel = pd.read_csv(excel_dir) 可以在控制台看是什么样的图表

    def __getitem__(self, index):
        fold = f"fold{self.excel.iloc[index,5]}"
        audio_name = f"{self.excel.iloc[index, 0]}"
        target = self.excel.iloc[index, 5]    # iloc函数后面跟[]
        audio_file = os.path.join(self.root_dir,fold)
        wave_path = os.path.join(audio_file,audio_name)
        waveform,sr = torchaudio.load(wave_path)
        waveform = self.reshape_if_necessary(waveform)
        waveform = self.resample_if_necessary(waveform,sr)
        waveform = self.mix_if_necessary(waveform)

        waveform_mel = self.transforms(waveform)

        return waveform_mel,sr,target # target是干嘛使的
    def __len__(self):
        return len(self.audio_file)

root_dir = r"C:\Users\LENOVO\PycharmProjects\pythonProject3\Code\Chapter2\UrbanSound8k\audio"
excel_dir = r"C:\Users\LENOVO\PycharmProjects\pythonProject3\Code\Chapter2\UrbanSound8k\UrbanSound8K.csv"

trans_mel = torchaudio.transforms.MelSpectrogram(sample_rate=16_000,n_fft=512)

mydata = UrbanSound8k(root_dir=root_dir,excel_dir=excel_dir,transforms=trans_mel,target_sr=16_000,target_samples=50_000)
index = 13
waveform_mel,sr,target = mydata[index]
plot_spectrogram(waveform_mel[0,:,:])

print(target)





 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值