深度学习笔记(7)——多进程实战之生成数据并保存到CSV
1.单个数据处理函数Generate_Data()
Generate_Data用于·单个数据处理。生成三个叠加的正弦波以及加噪后的正弦波
2.多进程处理plt无法在多进程中使用问题
import matplotlib
matplotlib.use('Agg')
3.设置核心数以及分配进程
num_worker = 12
# 分配进程
pool = Pool(num_worker) # num_worker代表电脑的cpu核心数
4. 构造待批处理的列表参数(Af为不同处理文件参数)
A,f = [],[]
for i in range(5):
# 设置不同频率和振幅的正弦波分量
f.append((i + 1) * 2 * 10)
A.append(round(((i + 1) * 0.2),2))
Af = []
for i in range(5):
for j in range(5):
for k in range(5):
for l in range(5):
for m in range(5):
for n in range(5):
Af.append([A[i],A[j],A[k],f[l],f[m],f[n]])
5.在进程里处理数据
pool.map(Generate_Data, Af)
import numpy as np
import matplotlib.pyplot as plt
import csv
import os
from glob import glob
from pathlib import Path
from functools import partial
from multiprocessing.dummy import Pool
import matplotlib
if __name__ == '__main__':
matplotlib.use('Agg')
# 保存照片的根目录,必须加后面的斜杠 /
ROOT = 'F:/dataset/'
image_root = 'signalset/image/'
root = ROOT + image_root
A,f = [],[]
for i in range(5):
# 设置不同频率和振幅的正弦波分量
f.append((i + 1) * 2 * 10)
A.append(round(((i + 1) * 0.2),2))
Af = []
for i in range(5):
for j in range(5):
for k in range(5):
for l in range(5):
for m in range(5):
for n in range(5):
Af.append([A[i],A[j],A[k],f[l],f[m],f[n]])
num_worker = 12
# 分配进程
pool = Pool(num_worker) # num_worker代表电脑的cpu核心数
templete_pure = 'Please help me generate a noise-free signal composed of three sine waves, where the first sine wave has a frequency of {} Hz and an amplitude of {}, the second sine wave has a frequency of {} Hz and an amplitude of {}, and the third sine wave has a frequency of {} Hz and an amplitude of {}.'
templete_noise = 'Please help me generate a signal with noise, composed of three sine waves, where the first sine wave has a frequency of {} Hz and an amplitude of {}, the second sine wave has a frequency of {} Hz and an amplitude of {}, and the third sine wave has a frequency of {} Hz and an amplitude of {}.'
print(len(Af))
with open(ROOT+'signalset/metadata.csv',mode='w') as csv_file:
witter = csv.writer(csv_file, delimiter=',')
witter.writerow(['Filename','Description'])
def Generate_Data(data):
# 计算正弦波分量的波形
# 设置时间间隔和采样频率
fs = 1350
t = np.arange(0, 1, 1 / fs)
N = len(t)
x1 = data[0] * np.sin(2 * np.pi * data[3] * t)
x2 = data[1] * np.sin(2 * np.pi * data[4] * t)
x3 = data[2] * np.sin(2 * np.pi * data[5] * t)
# 计算叠加波形
xn = x1 + x2 + x3
# 噪声信号
v1 = np.sqrt(0.03) * np.random.randn(N) # 产生均值为零、方差为0.04的高斯白噪声,加性噪声;
v2 = np.sqrt(0.05) * np.random.randn(N)
vn = v1 + v2
# 含噪信号
dn = xn + vn
# 绘图
plt.figure()
plt.plot(t, xn)
plt.axis('off')
filename2 = image_root + 'f' + str(data[3]) + '-' + str(data[4]) + '-' + str(data[5]) + 'A' + str(data[0]) + '-' + str(data[1]) + '-' + str(data[2])
filename2_jpg = filename2 + '-pure' + '.jpg'
plt.savefig(ROOT+filename2_jpg)
plt.close()
witter.writerow([filename2_jpg,templete_pure.format(data[0],data[3],data[1],data[4],data[2],data[5])])
plt.figure()
plt.plot(t, dn)
plt.axis('off')
filename3 = filename2 + '-noise' + '.jpg'
plt.savefig(ROOT+filename3)
plt.close()
witter.writerow([filename3, templete_noise.format(data[0], data[3], data[1], data[4], data[2], data[5])])
# 在进程里处理数据
pool.map(Generate_Data, Af)