目录
1. python build_dataset.py $crop_num $pairs_num $raw_data_path
No module named 'pytorch_lightning.utilities.distributed'
NotImplementedError: No operator found for `memory_efficient_attention_forward` with inputs:
Panodiff
模型和数据都在提供的压缩包里面。
数据:
1张全景图,50张拆分图,拆分图没有畸变。
算法步骤:
1. python build_dataset.py $crop_num $pairs_num $raw_data_path
2. Relative Pose Prediction
GPU=... # Define your GPU Device
save_path=data/pred_results.npy # Define the path for saved predictions
python pred_rotations.py \
config.yaml \
--classification_model_path=ckpts/stage1_classification_ckpt.pt \
--overlap_regression_model_path=ckpts/stage2_overlap_ckpt.pt \
--nonoverlap_regression_model_path=ckpts/stage2_nonoverlap_ckpt.pt \
--gpu=$GPU \
--save_path=$save_path
报错:
No module named 'pytorch_lightning.utilities.distributed'
解决方法:pip install pytorch-lightning==1.5.0
NotImplementedError: No operator found for `memory_efficient_attention_forward` with inputs:
解决方法:
NotImplementedError: No operator found for `memory_efficient_attention_forward` with inputs:-CSDN博客
代码注释:
import os
import yaml
import torch
import argparse
import numpy as np
from math import pi
from tqdm import tqdm
from torch.backends import cudnn
from dataset import get_test_loader # 用于加载测试数据集
from models import OverlapClassificationModel, RotationPredictionModel # 用于分类和旋转预测的模型
from utils.compute_utils import * # 导入计算所需的工具函数
def get_args():
# 解析命令行参数
parser = argparse.ArgumentParser(description='Test')
parser.add_argument('--config', type=str, default="config.yaml", help='The configuration file.')
parser.add_argument('--save_path', type=str, default='./pred_results.npy', help='The path to save the prediction results.')
# GPU相关设置
parser.add_argument('--gpu', default=0, type=int, help='GPU id to use.')
# 预训练模型路径
parser.add_argument('--classification_model_path', default="ckpts/stage1_classification_ckpt.pt", type=str, help="Pretrained checkpoint for classification")
parser.add_argument('--overlap_regression_model_path', default="ckpts/stage2_overlap_ckpt.pt", type=str, help="Pretrained checkpoint for overlap regression")
parser.add_argument('--nonoverlap_regression_model_path', default="ckpts/stage2_nonoverlap_ckpt.pt", type=str, help="Pretrained checkpoint for nonoverlap regression")
parser.add_argument('--val_angle', default=True, action='store_true', help="Evaluate yaw and pitch error")
args = parser.parse_args()
def dict2namespace(config):
# 将字典转为命名空间(方便通过属性访问)
namespace = argparse.Namespace()
for key, value in config.items():
if isinstance(value, dict):
new_value = dict2namespace(value)
else:
new_value = value
setattr(namespace, key, new_value)
return namespace
# 解析配置文件
with open(args.config, 'r') as f:
config = yaml.safe_load(f)
config = dict2namespace(config)
return args, config
def main_worker(cfg, args):
# 基础设置
cudnn.benchmark = True # 为了加速计算(在输入数据固定时启用)
# 获取测试数据加载器
_, test_loader = get_test_loader(cfg.data)
gt_metadata = np.load(cfg.data.pairs_file, allow_pickle=True).item() # 加载地面真实数据
# 初始化模型
overlap_classification_model = OverlapClassificationModel(cfg.models.overlap_classification)
overlap_classification_model.load(args.classification_model_path)
overlap_classification_model.set_eval() # 设置为评估模式
overlap_regression_model = RotationPredictionModel(cfg.models.overlap_rotation_regression_model)
overlap_regression_model.load(args.overlap_regression_model_path)
overlap_regression_model.set_eval()
nonoverlap_regression_model = RotationPredictionModel(cfg.models.nonoverlap_rotation_regression_model)
nonoverlap_regression_model.load(args.nonoverlap_regression_model_path)
nonoverlap_regression_model.set_eval()
# 运行预测
with torch.no_grad(): # 禁止梯度计算,节省内存
pred_overlap_status = overlap_classification_model(test_loader) # 获取重叠状态预测
overlap_rots = overlap_regression_model(test_loader) # 获取重叠情况下的旋转预测
nonelap_rots = nonoverlap_regression_model(test_loader) # 获取非重叠情况下的旋转预测
# 根据重叠状态合并旋转角度预测
pred_overlap_status = pred_overlap_status.cpu()
rot_x = overlap_rots['rot_x'].cpu() * pred_overlap_status + nonelap_rots['rot_x'].cpu() * (1 - pred_overlap_status)
rot_y1 = overlap_rots['rot_y1'].cpu() * pred_overlap_status + nonelap_rots['rot_y1'].cpu() * (1 - pred_overlap_status)
rot_y2 = overlap_rots['rot_y2'].cpu() * pred_overlap_status + nonelap_rots['rot_y2'].cpu() * (1 - pred_overlap_status)
# 计算评估指标
print("Computing metrics...")
if args.val_angle:
gt_rmat_array = None
out_rmat_array = None
overlap_all = None
all_res = {}
for index in tqdm(range(len(gt_metadata))): # 遍历地面真值数据
# 获取地面真值旋转角度
rotation_x1 = torch.tensor(gt_metadata[index]['img1']['x'])[..., None]
rotation_x2 = torch.tensor(gt_metadata[index]['img2']['x'])[..., None]
rotation_y1 = torch.tensor(gt_metadata[index]['img1']['y'])[..., None]
rotation_y2 = torch.tensor(gt_metadata[index]['img2']['y'])[..., None]
overlap_status = torch.tensor(gt_metadata[index]['is_overlap'])[..., None].bool()
# 计算地面真值旋转矩阵
gt_rmat = compute_gt_rmat(rotation_x1, rotation_y1, rotation_x2, rotation_y2, 1)
# 计算预测的旋转矩阵
pred_delta_x = rot_x[index][..., None]
pred_rot_y1 = rot_y1[index][..., None]
pred_rot_y2 = rot_y2[index][..., None]
rt1 = compute_rotation_matrix_from_viewpoint(torch.zeros_like(pred_delta_x), pred_rot_y1.float() / 180 * pi - pi, 1).view(1, 3, 3)
rt2 = compute_rotation_matrix_from_viewpoint(pred_delta_x.float() / 180 * pi - pi, pred_rot_y2.float() / 180 * pi - pi, 1).view(1, 3, 3)
out_rmat = compute_rotation_matrix_from_two_matrices(rt2, rt1).view(1, 3, 3).cuda()
# 将所有旋转矩阵和重叠状态存储起来
gt_rmat_array = gt_rmat if gt_rmat_array is None else torch.cat((gt_rmat_array, gt_rmat))
out_rmat_array = out_rmat if out_rmat_array is None else torch.cat((out_rmat_array, out_rmat))
overlap_all = overlap_status if overlap_all is None else torch.cat((overlap_all, overlap_status))
# 计算Geodesic距离(角度误差)
geodesic_loss = compute_geodesic_distance_from_two_matrices(out_rmat_array.view(-1, 3, 3), gt_rmat_array.view(-1, 3, 3)) / pi * 180
gt_distance = compute_angle_from_r_matrices(gt_rmat_array.view(-1, 3, 3))
# 将结果分为重叠部分和宽基线部分
geodesic_loss_overlap = geodesic_loss[overlap_all]
geodesic_loss_widebaseline = geodesic_loss[~overlap_all]
# 存储最终结果
res_error = {"gt_angle": gt_distance / pi * 180, "rotation_geodesic_error_overlap": geodesic_loss_overlap, "rotation_geodesic_error_widebaseline": geodesic_loss_widebaseline,
"rotation_geodesic_error": geodesic_loss, }
for k, v in res_error.items():
v = v.view(-1).detach().cpu().numpy()
if k == "gt_angle" or v.size == 0:
continue
mean = np.mean(v)
count_10 = (v <= 10).sum(axis=0)
percent_10 = np.true_divide(count_10, v.shape[0])
all_res.update({k + '/mean': mean, k + '/10deg': percent_10 * 100})
for k, v in all_res.items():
print(k, ': ', v)
# 保存预测结果
rot_x = (rot_x - 180) / 180 * torch.pi # 归一化旋转角度到[-pi, pi]
rot_y1 = (rot_y1 - 180) / 180 * torch.pi
rot_y2 = (rot_y2 - 180) / 180 * torch.pi
pred_meta = {}
for index in range(0, len(gt_metadata)):
one_pair = {}
one_pair.update(gt_metadata[index]) # 获取当前图像对的元数据
# 更新旋转角度
one_pair['img1']['y'] = rot_y1[index].item()
one_pair['img2']['y'] = rot_y2[index].item()
img2_x = one_pair['img1']['x'] + rot_x[index].item()
# 保证旋转角度在[-pi, pi]范围内
if img2_x >= pi:
img2_x -= 2 * pi
elif img2_x < -pi:
img2_x += 2 * pi
one_pair['img2']['x'] = img2_x
pred_meta[index] = one_pair
# 将预测结果保存为npy文件
np.save(args.save_path, pred_meta, allow_pickle=True)
if __name__ == '__main__':
# 获取命令行参数和配置文件
args, cfg = get_args()
# 设置GPU
os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
# 运行主工作函数
main_worker(cfg, args)
单图推理:
/shared_disk/users/lbg/project/Panodiff-main/public_demo.py
import os
os.chdir(os.path.dirname(os.path.abspath(__file__)))
from share import *
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader
from dataloaders.rota_dataset import SingleImageDataset, RotationDataset
from cldm.logger import ImageLogger
from cldm.model import create_model, load_state_dict
import time
import os
import datetime
import torch
import cv2
import numpy as np
from torch.utils.data import Dataset
def read_mask(path, height, width):
mask = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
mask = cv2.resize(mask, (width, height))
mask = np.where(mask>0, 1.0, 0).astype(np.float32)
return np.expand_dims(mask, axis=-1)
class MyDataset(Dataset):
def __init__(self, root_path='datasets/sun360_d1_t30000_v03000'):
self.root_path = root_path
self.height = 512
self.width = 1024
# self.height, self.width = 180, 320
# self.mask = read_mask(os.path.join(self.root_path, 'mask.jpg'), self.height*2, self.width*2)
self.mask = read_mask('/mnt/pfs/users/lbg/project/SD-T2I-360PanoImage/data/i2p-mask.jpg', self.height, self.width)
#target_filled = np.zeros([self.height*2, self.width*2, 3])
#target_filled[self.mask[...,0]>0] = cv2.resize(cv2.imread(os.path.join(self.root_path, 'nov.jpg')), (self.width, self.height)).reshape([-1,3])
def __len__(self):
return 1
def __getitem__(self, idx):
source_filename = '/shared_disk/users/lbg/project/Panodiff-main/assets/output_image.png'
target_filename = '/shared_disk/users/lbg/project/Panodiff-main/assets/panorama.jpg'
prompt = 'road with side tree'
#source = cv2.imread(os.path.join(self.root_path, source_filename))
target = cv2.imread(target_filename)
target = cv2.resize(target, (self.width, self.height))
target = cv2.cvtColor(target, cv2.COLOR_BGR2RGB)
# Do not forget that OpenCV read images in BGR order.
source = cv2.imread(os.path.join("", target_filename))
source = cv2.resize(source, (self.width, self.height))
source = cv2.cvtColor(source, cv2.COLOR_BGR2RGB)
# Normalize source images to [0, 1].
source = source.astype(np.float32) / 255.0
# Normalize target images to [-1, 1].
# NOTE(wjh): only output rgb layers. Do we need to address the None part as 0?
target = (target.astype(np.float32) / 127.5) - 1.0
return dict(jpg=target, txt=prompt, hint=source, mask=self.mask)
if 0: #code o
target_filled = np.zeros([self.height, self.width, 3])
target_filled[self.mask[...,0]>0] = target.reshape([-1,3])
source = target_filled.copy()
# Normalize source images to [0, 1].
source = source.astype(np.float32) / 255.0
# Normalize target images to [-1, 1].
# NOTE(wjh): only output rgb layers. Do we need to address the None part as 0?
target_filled = (target_filled.astype(np.float32) / 127.5) - 1.0
# ugly fix
target_filled = np.pad(target_filled, ((12, 12), (0, 0), (0, 0)), mode='constant', constant_values=-1)
source = np.pad(source, ((12, 12), (0, 0), (0, 0)), mode='constant', constant_values=0)
mask = np.pad(1-self.mask, ((12, 12), (0, 0), (0, 0)), mode='constant', constant_values=0)
return dict(jpg=target_filled, txt=prompt, hint=source, mask=mask)
# Configs
test=False
data_root_path = '/shared_disk/comfyui/models/panodiff_assets/sun360_example/raw_crops'
pair_path = '/shared_disk/comfyui/models/panodiff_assets/sun360_example/meta/sun360_example.npy'
#mask_image_path = '/data/chenziyu/myprojects/OmniDreamer/assets/90binarymask.png'
num_training = 500
exclude_360 = True
batch_size = 1
# Rotation Supervision
rotation_supervise = False
rotation_loss_lambda = 5e-4
roll_augment = False
roll_schedule = True
padding_augment = True
logger_freq = 1 #num_training // batch_size * 2
learning_rate = 1e-5
resume_path = '/shared_disk/comfyui/models/panodiff_assets/pretrained_models/norota_clean.ckpt'
# resume_path = 'datasets/ziyu_ckpt/RollAug/epoch=5-step=7499.ckpt'
NUM_GPUS = 1
N_acc = 2 # gradient accumulate
max_epochs = 20 * N_acc
sd_locked = True
only_mid_control = False
img_size = 512
log_path = 'logs_1210'
name_head = '231225_public'
current_time = datetime.datetime.now()
expname = name_head + current_time.strftime("%Y-%m-%d-%H:%M:%S")
expname = name_head
image_logger_path = os.path.join(log_path, expname)
# First use cpu to load models. Pytorch Lightning will automatically move it to GPUs.
model = create_model('models/norota_inpaint.yaml').cuda()
model.load_state_dict(load_state_dict(resume_path, location='cuda:0'), strict=False)
model.learning_rate = learning_rate
model.sd_locked = sd_locked
model.only_mid_control = only_mid_control
model.use_gt_rots = True
model.padding_augment = padding_augment
model.down_scale = 1
model.roll_augment = roll_augment
model.roll_schedule = roll_schedule
model.deform_augment = False
# Misc
# test_dataset = SingleImageDataset(root=data_root_path,
# num_images=num_training,
# pairs_file=pair_path,
# height=img_size,
# Train=False)
test_dataset = MyDataset(root_path=data_root_path )
# test_dataset = RotationDataset(root=data_root_path,
# num_images=num_training,
# pairs_file=pair_path,
# height=img_size,
# Train=False,
# down_scale=1)
test_dataloader = DataLoader(test_dataset,
num_workers=1,
batch_size=batch_size,
shuffle=False)
tb_logger = TensorBoardLogger(
save_dir=log_path,
name=expname
)
image_callback = ImageLogger(batch_frequency=logger_freq,
save_dir=image_logger_path,
tb_logger=tb_logger)
model.eval()
device = model.device#'cuda:0'
#model.to(device)
with torch.no_grad():
for b_idx, batch in enumerate(test_dataloader):
for item in batch:
if isinstance(batch[item], torch.Tensor):
batch[item] = batch[item].to(device)
# outputs = model(batch)
# using flip to check if the left and right is connected
image_callback.log_img(model, batch, batch_idx=b_idx, split="test", flip=False)