买不起卡…
导包
import torch.multiprocessing as mp
import torch
import torch.nn as nn
import torch.utils.data as Data
这里使用torch.multiprocessing
构造多进程,与python的多进程神似
准备训练数据(同单进程)
这里使用随机数生成训练数据,就不用下载什么数据了
def get_train_data():
"""得到训练数据,这里使用随机数生成训练数据,由此导致最终结果并不好"""
def get_tensor_from_pd(dataframe_series) -> torch.Tensor:
return torch.tensor(data=dataframe_series.values)
import numpy as np
import pandas as pd
from sklearn import preprocessing
# 生成训练数据x并做归一化后,构造成dataframe格式,再转换为tensor格式
df = pd.DataFrame(data=preprocessing.MinMaxScaler().fit_transform(np.random.randint(0, 10, size=(2000, 300))))
y = pd.Series(list(range(2000)))
return get_tensor_from_pd(df).float(), get_tensor_from_pd(y).float()
构造模型(同单进程)
这里使用一个LSTM的AutoEncoder,可以换成任何你想要训练的模型哈~,多进程与模型是什么无关
class LstmFcAutoEncoder(nn.Module):
def __init__(self, input_layer=300, hidden_layer=100, batch_size=20):
super(LstmFcAutoEncoder, self).__init__()
self.input_layer = input_layer
self.hidden_layer = hidden_layer
self.batch_size = batch_size
self.encoder_lstm = nn.LSTM(self.input_layer, self.hidden_layer, batch_first=True)
self.encoder_fc = nn.Linear(self.hidden_layer, self.hidden_layer)
self.decoder_lstm = nn.LSTM(self.hidden_layer, self.input_layer, batch_first=True)
self.decoder_fc = nn.Linear(self.hidden_layer, self.hidden_layer)
self.relu = nn.ReLU()
def forward(self, input_x):
input_x = input_x.view(len(input_x), 1, -1)
# encoder
encoder_lstm, (n, c) = self.encoder_lstm(input_x,
# shape: (n_layers, batch, hidden_size)
(torch.zeros(1, self.batch_size, self.hidden_layer),
torch.zeros(1, self.batch_size, self.hidden_layer)))
encoder_fc = self.encoder_fc(encoder_lstm)
encoder_out = self.relu(encoder_fc)
# decoder
decoder_fc = self.relu(self.decoder_fc(encoder_out))
decoder_lstm, (n, c) = self.decoder_lstm(decoder_fc,
(torch.zeros(1, 20, self.input_layer),
torch.zeros(1, 20, self.input_layer)))
return decoder_lstm.squeeze()
构造训练代码(同单进程)
由于多个进程之间是隔离的,因此除了模型参数会共享,其余都不共享:损失函数、优化器、迭代次数不共享
def train(model, data_loader, loss_function, optimizer, epochs):
for i in range(epochs):
for seq, labels in data_loader:
optimizer.zero_grad()
y_pred = model(seq).squeeze() # 压缩维度:得到输出,并将维度为1的去除
single_loss = loss_function(y_pred, seq)
single_loss.backward()
optimizer.step()
开始训练(这里设置多进程!)
if __name__ == '__main__':
model = LstmFcAutoEncoder()
x, y = get_train_data()
这里将data构造为DataLoader
,当然不构造也可以
train_loader = Data.DataLoader(
dataset=Data.TensorDataset(x, y), # 封装进Data.TensorDataset()类的数据,可以为任意维度
batch_size=20, # 每块的大小
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=3, # 多进程(multiprocess)来读数据
)
# 常规训练三件套
loss_function = nn.MSELoss() # loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 优化器
epochs = 150
开始多进程的设置:
# 多进程开始
num_processes = 4 # 设置4个进程
# NOTE: this is required for the ``fork`` method to work
model.share_memory()
processes = []
for rank in range(num_processes):
# 4 个进程,每个进程epoch为150,也就是说其实迭代了 4*150 = 600 次 !!!
p = mp.Process(target=train, args=(model, train_loader, loss_function, optimizer, epochs))
p.start()
processes.append(p)
for p in processes:
p.join()
如果需要尽情压榨cpu的效率,可以修改为:
from multiprocessing import cpu_count
num_processes = cpu_count()
验证多进程下参数的有效性
- 打开控制台看cpu是否多进程(当然是啦)
- 验证参数:在调试时打断点(建议打到61行:optimizer.step()),用一个进程迭代几轮,然后看其他每个进程的模型的参数:
print(model.encoder_fc.state_dict()) # 看model的encoder_fc层的参数
完整的代码
import torch.multiprocessing as mp
import torch
import torch.nn as nn
import torch.utils.data as Data
def get_train_data():
"""得到训练数据,这里使用随机数生成训练数据,由此导致最终结果并不好"""
def get_tensor_from_pd(dataframe_series) -> torch.Tensor:
return torch.tensor(data=dataframe_series.values)
import numpy as np
import pandas as pd
from sklearn import preprocessing
# 生成训练数据x并做归一化后,构造成dataframe格式,再转换为tensor格式
df = pd.DataFrame(data=preprocessing.MinMaxScaler().fit_transform(np.random.randint(0, 10, size=(2000, 300))))
y = pd.Series(list(range(2000)))
return get_tensor_from_pd(df).float(), get_tensor_from_pd(y).float()
class LstmFcAutoEncoder(nn.Module):
def __init__(self, input_layer=300, hidden_layer=100, batch_size=20):
super(LstmFcAutoEncoder, self).__init__()
self.input_layer = input_layer
self.hidden_layer = hidden_layer
self.batch_size = batch_size
self.encoder_lstm = nn.LSTM(self.input_layer, self.hidden_layer, batch_first=True)
self.encoder_fc = nn.Linear(self.hidden_layer, self.hidden_layer)
self.decoder_lstm = nn.LSTM(self.hidden_layer, self.input_layer, batch_first=True)
self.decoder_fc = nn.Linear(self.hidden_layer, self.hidden_layer)
self.relu = nn.ReLU()
def forward(self, input_x):
input_x = input_x.view(len(input_x), 1, -1)
# encoder
encoder_lstm, (n, c) = self.encoder_lstm(input_x,
# shape: (n_layers, batch, hidden_size)
(torch.zeros(1, self.batch_size, self.hidden_layer),
torch.zeros(1, self.batch_size, self.hidden_layer)))
encoder_fc = self.encoder_fc(encoder_lstm)
encoder_out = self.relu(encoder_fc)
# decoder
decoder_fc = self.relu(self.decoder_fc(encoder_out))
decoder_lstm, (n, c) = self.decoder_lstm(decoder_fc,
(torch.zeros(1, 20, self.input_layer),
torch.zeros(1, 20, self.input_layer)))
return decoder_lstm.squeeze()
def train(model, data_loader, loss_function, optimizer, epochs):
for i in range(epochs):
for seq, labels in data_loader:
optimizer.zero_grad()
y_pred = model(seq).squeeze() # 压缩维度:得到输出,并将维度为1的去除
single_loss = loss_function(y_pred, seq)
single_loss.backward()
optimizer.step()
if __name__ == '__main__':
model = LstmFcAutoEncoder()
x, y = get_train_data()
train_loader = Data.DataLoader(
dataset=Data.TensorDataset(x, y), # 封装进Data.TensorDataset()类的数据,可以为任意维度
batch_size=20, # 每块的大小
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=3, # 多进程(multiprocess)来读数据
)
# 常规训练三件套
loss_function = nn.MSELoss() # loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 优化器
epochs = 150
# 多进程开始
# from multiprocessing import cpu_count
#
# num_processes = cpu_count()
num_processes = 4 # 4个进程
# NOTE: this is required for the ``fork`` method to work
model.share_memory()
processes = []
for rank in range(num_processes):
# 4 个进程,每个进程epoch为150,也就是说其实迭代了 4*150 = 600 次 !!!
p = mp.Process(target=train, args=(model, train_loader, loss_function, optimizer, epochs))
p.start()
processes.append(p)
for p in processes:
p.join()