读取RML2018.01A数据集

Chai.yx

已于 2024-02-22 09:12:34 修改

阅读量1.8k

点赞数 15

文章标签： python 深度学习

于 2024-01-05 10:19:28 首次发布

本文链接：https://blog.csdn.net/qq_63986791/article/details/135402535

版权

2024.1.11勘误

数据集下载地址Datasets – DeepSig

下载完成后解压得到GOLD_XYZ_OSC.0001_1024.hdf5

目的：读取RML2018.01数据集用于深度学习训练

代码将数据集读取为tensor数据类型，并划分为训练集数据，训练集标签，测试集数据，测试集标签。

修改路径：需要将代码7，8行路径修改为自己的.hdf5数据存放路径。

可以选择是否截取信噪比——select_SNR=True为截取信噪比大于2的数据，select_SNR=False使用所有信噪比数据。可以修改第25行的（if Z >= 2: ）自己选择截取多大信噪比的数据。

选择是否将时域数据转换为频域数据——fft=True为使用频域，fft=False为使用时域。

import numpy as np
import torch
import h5py


def load_data_2018(select_SNR=True, fft=False):
    with open(r"E:/2018.01/GOLD_XYZ_OSC.0001_1024.hdf5", 'rb'):
        h5file = h5py.File('E:/2018.01/GOLD_XYZ_OSC.0001_1024.hdf5', 'r+')

        X_ = h5file['X'][:]     # [2555904, 1024, 2]
        Y_ = h5file['Y'][:]     # [2555904, 24]
        Z_ = h5file['Z'][:]     # [2555904, 1]

        X_array = np.array(X_)  # IQ
        Y_array = np.array(Y_)  # mod
        Z_array = np.array(Z_)  # SNR

        # mods = ['OOK', '4ASK', '8ASK', 'BPSK', 'QPSK', '8PSK', '16PSK', '32PSK', '16APSK', '32APSK', '64APSK', '128APSK',
        # '16QAM'12, '32QAM', '64QAM', '128QAM', '256QAM', 'AM-SSB-WC', 'AM-SSB-SC', 'AM-DSB-WC', 'AM-DSB-SC', 'FM'21, 'GMSK',
        # 'OQPSK']
        
        if select_SNR:    # 选取信噪比大于2的数据
            index = []
            for i, Z in enumerate(Z_array):
                if Z >= 2:
                    index.append(i)
                else:
                    continue
            X_array = X_array[index]
            Y_array = Y_array[index]

        n_examples = X_array.shape[0]
        n_train = n_examples * 0.8  # 划分数据集 n_train : n_test = 8:2
        train_idx = np.random.choice(range(0, int(n_examples)), size=int(n_train), replace=False)
        test_idx = list(set(range(0, n_examples)) - set(train_idx))
        X_train = X_array[train_idx]    # 训练数据
        X_test = X_array[test_idx]      # 测试数据

        if fft:  # 转换为频域数据
            for index, X_train_12 in enumerate(X_train):
                X_train1_fft = np.abs(np.fft.fft(X_train_12[:, 0])) ** 2  # 第一条线
                time_step = 1
                X_train1_fftfreq = np.fft.fftfreq(len(X_train1_fft), time_step)
                idx = np.argsort(X_train1_fftfreq)
                X_train1 = X_train1_fft[idx]
                X_train11 = np.expand_dims(X_train1, axis=1)

                X_train2_fft = np.abs(np.fft.fft(X_train_12[:, 1])) **2  # 第二条线
                time_step = 1
                X_train2_fftfreq = np.fft.fftfreq(len(X_train2_fft), time_step)
                idx = np.argsort(X_train2_fftfreq)
                X_train2 = X_train2_fft[idx]
                X_train22 = np.expand_dims(X_train2, axis=1)

                X_train_fft = np.concatenate((X_train11, X_train22), axis=1)  # 两条线合在一起
                X_train[index] = X_train_fft

            for index, X_test_12 in enumerate(X_test):

                X_test1_fft = np.abs(np.fft.fft(X_test_12[:, 0])) ** 2
                time_step = 1
                X_test1_fftfreq = np.fft.fftfreq(len(X_test1_fft), time_step)
                idx = np.argsort(X_test1_fftfreq)
                X_test1 = X_test1_fft[idx]
                X_test11 = np.expand_dims(X_test1, axis=1)

                X_test2_fft = np.abs(np.fft.fft(X_test_12[:, 1])) ** 2
                time_step = 1
                X_test2_fftfreq = np.fft.fftfreq(len(X_test2_fft), time_step)
                idx = np.argsort(X_test2_fftfreq)
                X_test2 = X_test2_fft[idx]
                X_test22 = np.expand_dims(X_test2, axis=1)

                X_test_fft = np.concatenate((X_test11, X_test22), axis=1)
                X_test[index] = X_test_fft

            X_train = torch.from_numpy(X_train)
            X_test = torch.from_numpy(X_test)
        else:  # 使用时域数据
            X_train = torch.from_numpy(X_train)
            X_test = torch.from_numpy(X_test)

        Y_torch = torch.from_numpy(Y_array)
        Y_torch = torch.argmax(Y_torch, dim=1)

        Y_train = Y_torch[train_idx]
        Y_test = Y_torch[test_idx]
        print()
        return X_train, Y_train, X_test, Y_test
        # X_train[2555904, 1024, 2]tensor类型，Y_train[2555904]tensor类型
        # X_test[2555904, 1024, 2]tensor类型，Y_test[]tensor类型