一、VAE模型训练
1.1 训练集
1.1.1 生成自定大小的矩阵(二进制字符串对应的矩阵)(扩充后)
1.1.2 根据矩阵生成对应像素的训练图片
1.2 自定义与加载训练集
for i in os.listdir(train_dir):
train_images.append(os.path.join(train_dir, i))
# 图像预处理
img_transform = transforms.Compose([
transforms.ToTensor(),
# 通过Normalize计算过后,将数据归一化到[-1,1]
transforms.Normalize(mean=0.5, std=0.5)
])
# 自定义训练集
class MyTrainset(Dataset):
imgs = []
def __init__(self, transform=img_transform, target_transform=None):
self.imgs = train_images
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.imgs)
def __getitem__(self, index):
image = self.imgs[index]
img = Image.open(image).convert('1')
img = img.resize((IMG_W, IMG_H))
if self.transform is not None:
img = self.transform(img)
return img
trainSet = MyTrainset() # 实例化自定义数据集
dataloader = DataLoader(dataset=trainSet, batch_size=batch_size, shuffle=True) # 加载数据集
1.2 VAE网络代码编写
1.2.1 定义网络层结构
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
self.fc1 = nn.Linear(1296, 512)
self.fc21 = nn.Linear(512, z_dim)
self.fc22 = nn.Linear(512, z_dim)
self.fc3 = nn.Linear(z_dim, 512)
self.fc4 = nn.Linear(512, 1296)
def encode(self, x):
h1 = F.leaky_relu(self.fc1(x))
return self.fc21(h1), self.fc22(h1)
def decode(self, z):
dh1 = F.leaky_relu(self.fc3(z))
return F.tanh(self.fc4(dh1))
def reparametrize(self, mu, logvar):
std = logvar.mul(0.5).exp_()
if torch.cuda.is_available():
eps = torch.cuda.FloatTensor(std.size()).normal_()
else:
eps = torch.FloatTensor(std.size()).normal_()
eps = Variable(eps)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparametrize(mu, logvar)
return self.decode(z), mu, logvar
1.2.2 定义损失函数
def loss_function(recon_x, x, mu, logvar):
BCE = reconstruction_function(recon_x, x) # mse loss
KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
KLD = torch.sum(KLD_element).mul_(-0.5)
return BCE + KLD
1.2.3 定义优化器
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
saved_model_path, log_path, image_ori_path, image_recon_path = form_results()
1.2.4 核心训练过程
for epoch in range(n_epochs):
model.train()
train_loss = 0
for batch_idx, data in enumerate(dataloader):
all_batchs = int(len(dataloader.dataset) / batch_size)
img = data
img = img.view(img.size(0), -1)
# data.resize_(BATCH_SIZE,36)
img = Variable(img)
if torch.cuda.is_available():
img = img.cuda()
optimizer.zero_grad()
recon_batch, mu, logvar = model(img)
loss = loss_function(recon_batch, img, mu, logvar)
loss.backward()
train_loss += loss.item()
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch,
batch_idx * len(img),
len(dataloader.dataset), 100. * batch_idx / len(dataloader),
loss.item() / (len(img) * IMG_H * IMG_W)))
if batch_idx + 1 == all_batchs:
# if not os.path.exists("../data/originalImages_36_36"):
# os.mkdir("../data/originalImages_36_36")
# if not os.path.exists("../data/reconstructed_36_36"):
# os.mkdir("../data/reconstructed_36_36")
# if not os.path.exists("../data/reconstructedData"):
# os.mkdir("../data/reconstructedData")
ori_data = to_img(img[:16].data)
save_image(ori_data, image_ori_path + f'ori_image_{epoch}_{batch_idx}.png')
recon_data = to_img(recon_batch[:16].data)
save_image(recon_data, image_recon_path +
f'recon_image_{epoch}_{batch_idx}.png')
# np.savetxt(f'reconstructedData/recon_data_{epoch}_{batch_idx}.data',recon_data,delimiter=',')
print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / (len(dataloader.dataset) * IMG_H * IMG_W)))
# 保存损失到data文件中
with open(log_path + 'loss.data', 'a') as file:
file.write(str(epoch))
file.write(" ")
file.write(str(train_loss / (len(dataloader.dataset) * IMG_H * IMG_W)))
file.write("\n")
file.close()
torch.save(model.state_dict(), saved_model_path + 'vae.pth')
1.3 模型测试
1.3.1 模型测试精度对比
# 通过改变地址从而改变需要测试的模型
path_model = results_path + '/2022_11_11_15_44_54_(10_0.001_32_500_36_36)_Adversarial_Autoencoder/Saved_models/vae.pth'
# 实例化网络结构
model = VAE()
# 加载模型
model.load_state_dict(torch.load(path_model), True)
测试结果
第一张图为刚开始训练时的单通道图像,第二张为训练结束解码后的图像,第三张为原图)
及误差分析对比图
从上到下分别为不同输出维度和训练批大小对应模型的图像,即z10_b32,z8_b32,z6_b32,z10_b64,z8_b64,z6_b64,可见精度最高的为输出维度10,批大小为为32)
1.3.2 随机张量生成
- 1.随机生成一定数量的张量
random_sample = torch.randn(1, 10) # [batch, channel, height, width]若参数个数为四则如此,现表示1*10的矩阵张量
- 2.输入模型进行解码,得到解码后的张量值
- 3.将张量进行归一化处理,将范围控制在(0,1)之间
tensor_decode = model.decode(random_sample).clamp(0, 1) # # torch.Size([1, 1296])
- 4.矩阵压缩,将原先扩大的矩阵进行还原
def transferMatrix(matrix_demo, broad):
height = int(matrix_demo.shape[0] / broad)
# print(height)
width = int(matrix_demo.shape[1] / broad)
# print(width)
new_list = []
for index1 in range(0, height):
for index2 in range(0, width):
count = 0
for index3 in range(index1 * broad, index1 * broad + 6):
for index4 in range(index2 * broad, index2 * broad + 6):
# print(index3, index4)
count += matrix_demo[index3, index4]
# print(count)
if count / (broad * broad) < 0.5:
new_list.append(0)
if count / (broad * broad) >= 0.5:
new_list.append(1)
# print(len(new_list))
# new_matrix = np.mat(new_list).reshape(6, 6)
return new_list
- 5.张量转换为二进制字符串(遍历二维矩阵)
list_tensor_decode = tensor_decode.tolist()
# print(list_tensor_decode)
path_init = log_path + 'test_{}.npy'.format(i)
matrix = np.mat(list_tensor_decode).reshape(36, 36)
trans_matrix = transferMatrix(matrix, 6) # 4.张量转化为二进制字符串
np.save(path_init, trans_matrix)
print(np.load(path_init))
1.3.3 张量解码测试
# 1.生成张量
1.1.1 随机生成
random_sample = torch.randn(1, 10) # [batch, channel, height, width] # torch.Size([1, 10])
1.1.12 数组转张量
random_sample = torch.tensor(arr)
保存在同一目录下
二、贝叶斯优化
2.1 基本代码编写
2.1.1 定义需要优化的目标函数
def black_box_function(A, B, C, D, E, F, G, H, I, J):
"""Function with unknown internals we wish to maximize.
This is just serving as an example, for all intents and
purposes think of the internals of this function, i.e.: the process
which generates its output values, as unknown.
"""
return np.exp(A + log(B)) * (C ** 2 - D ** 3) / np.exp(E + F) * G ** 3 / np.exp(H) * (math.sqrt(I) * J)
# 这里的目标函数为自定义,目的是为了测试贝叶斯的搜索极值的收敛能力
2.1.2 定义优化函数(优化器)
def opt_function():
# 定义输入参数边界
pbounds = {'A': (-3, 3), 'B': (0.1, 3), 'C': (-3, 3), 'D': (-3, 3), 'E': (-3, 3), 'F': (-3, 3), 'G': (-3, 3),
'H': (-3, 3), 'I': (1, 3), 'J': (-3, 3)}
# 黑盒函数输出的目标值
def black_box(A, B, C, D, E, F, G, H, I, J):
return black_box_function(
A=A,
B=B,
C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J
)
# 优化器对目标值的优化
optimizer = BayesianOptimization(
f=black_box,
pbounds=pbounds,
verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
random_state=1 # 随机种子
)
optimizer.maximize(
init_points=0, # 初始使用的数据个数
n_iter=100, # 设置迭代次数
acq="ei" # 采集函数EI(expected Improvement)
)
# 保存日志
with open('./Test_logs.json', 'a', encoding='utf-8', errors='replace')as f:
f.write(str(optimizer.max))
2.2 黑盒函数
2.2.1 思路
具体思路:黑盒函数返回的目标值是贝叶斯优化的目标,即热导率大小,判断MD计算完毕后再进行热导率的计算-运行themal脚本,判断热导率文件是否生成再进行返回,并且判断热导率文件内是否有数字(设置随机种子,让选取的参数保持一致,预防断电后MD失败 )
- 1.贝叶斯优化出来的参数转为数组、数组转为张量 √
- 2.返回的值大小为读取的热导率大小,文件命名方式为输入的参数 √
- 3.追踪每次推荐的参数数组(debug后发现是在x_probe中,x_probe格式为字典形式)√
- 4.将VAE_test部分函数复制到VAE_model中并存入bayesian_optimization.py同级目录
- 5.在bayesian_optimization.py同级目录中的__init__.py中添加需要引入的函数,以便调用 √
2.2.2 PPE链结构分子重建
1.car文件生成
demo已完成,后续工作将保存的文件用return 地址的形式返回 √
2.car文件内原子排序 √
3.根据car文件生成mdf文件 √
1.ctrl+R 批量替换
4.将car文件变为正确格式 √
5.data文件生成 √
msi2lmp.exe 1111111111111111 -class 2 -frc ./pcff.frc -i
6.复制in文件 √
7.编写脚本生成单体长度为36的结构 √
8.lammps计算热导率 √
mpiexec -np 4 lmp_mpi -in TC.in
9.black_function函数修改返回值 √
10.实现全自动
- 1.首次运行随机产生张量
- 2.调用VAE模型进行解码
- 3.调用脚本将转码结果转换为字符串
- 4.调用脚本生成对应的data文件
- 5.运行MD后,调用脚本计算热导率
- 6.黑盒函数返回参数列表对应文件位置中的热导率值(批量解码测试集数据,确定pbounds设置范围)
- 7.贝叶斯网络推荐参数
重复2~7的过程