Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

最新推荐文章于 2024-06-06 18:57:42 发布

Victorshengw

最新推荐文章于 2024-06-06 18:57:42 发布

阅读量218

点赞数

文章标签： python 深度学习

本文链接：https://blog.csdn.net/Victorshengw/article/details/131617019

版权

**这应该是比较全的记录了吧！！**

## 问题全貌：

## 问题分析
1. 乍一看数据已经到CUDA,但是模型权重没有加载至CUDA
2. 直接思考得到的原因：model没有to(device)
3. 但是我通过添加model.to(device)后依然**报相同错误**
### 补充几种将model/tensor 加载入GPU方法
> 1. model = model.cuda()
   2. model = model.to('cuda')
   3. model.cuda()
   4. tensor = tensor.cuda()

> 1. tensor = tensor.to("cuda")
> 2. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
> tensor = tensor.to(device)
注意Module.to() 是一个“in-place” 方法， tensor.to()函数不是，他是一个new出新对象的，不会改变原有tensor，所以必须有新对象承接。## 解决方案2：查看模型输入输出加载位置
- **模型**：
方法1. print(next(model.parameters()).is_cuda)
方法2.

model = model.to('cuda') # 将模型加载到GPU上 
if model.is_cuda: 
    print("模型已加载到GPU上") 
else:
    print("模型未加载到GPU上")

for name, param in model.named_parameters():
    if param.is_cuda:
        print(f"参数 {name} 已加载到GPU上")
    else:
        print(f"参数 {name} 未加载到GPU上")

- **数据**

print(tensor.device)

解决方案3：模型中包含list包装卷积层，或是有可变（可学习参数）
例如这位大兄弟的：[model.to(device)无法将自定义层的tensor转移到指定设备\_pyxiea的博客-CSDN博客](https://blog.csdn.net/xpy870663266/article/details/104071765)

解决方案4：在forwar 函数中new了新对象，然后没有 to(device)
原因：如果没有to(device)，新new出来的对象默认存在于cpu，
因此要么将它写在__init__ 中，要么to(device)
其实最正规还得写进__init__。
**错误：**

class MaNet(nn.Module):  
    def __init__(self):  
        super(MaNet,self).__init__()  
        self.first_channel = 256  
        self.layer0 = nn.Sequential(  
            Conv_3x3(8,32),  
            Conv_3x3(32,3),  
        )  
        self.layer1 = Conv_7x7(3,64)  
        self.layer2 = nn.Sequential(  
            nn.MaxPool2d(kernel_size=2, stride=2),  
            resnet18(64)  
        )  
  
        self.double_conv1 = nn.Sequential(  
            Conv_3x3(self.first_channel,self.first_channel//2), #256,1258  
            Conv_3x3(self.first_channel//2,self.first_channel//2),  
        )  
        self.double_conv2 = nn.Sequential(  
            Conv_3x3(self.first_channel//2,self.first_channel//4),  
            Conv_3x3(self.first_channel//4,self.first_channel//4),  
        )  
        self.double_conv3 = nn.Sequential(  
            Conv_3x3(self.first_channel // 2, self.first_channel // 4),  
            Conv_3x3(self.first_channel // 4, self.first_channel // 4),  
        )  
    def forward(self,x):  
        x0 = self.layer0(x)  
        x1 = self.layer1(x0)  
  
        x2, x3, x4= self.layer2(x1)  
        res_x1 = Ds1()(x1)  
        res_x2 = Ds2()(x2)  
        res_x3 = Ds3()(x3)  
  
        #first upsample  
        up_x4 = F.interpolate(x4,scale_factor=2,mode="nearest")  
        up_x4 = Conv_3x3(256,128)(up_x4)  
        com_x3 = torch.concat((x3,up_x4),dim=1)  
        out_com_x3 = self.double_conv1(com_x3)  
        #second upsample  
        out_com_x3 = F.interpolate(out_com_x3,scale_factor=2,mode="nearest")  
        out_com_x3 = Conv_3x3(128,64)(out_com_x3)  
        com_x2 = torch.concat((x2, out_com_x3), dim=1)  
        out_com_x2 = self.double_conv2(com_x2)  
        #third upsample  
        out_com_x2 = F.interpolate(out_com_x2,scale_factor=2,mode="nearest")  
        up_x2 = Conv_3x3(64, 64)(out_com_x2)  
        com_x1 = torch.concat((x1,up_x2),dim=1)  
        out_com_x1 = self.double_conv3(com_x1)  
        #the last upsample  
        out_com_x1 = F.interpolate(out_com_x1, scale_factor=2, mode="nearest")  
        return res_x1 , res_x2,res_x3,OutputBlock()(out_com_x1)

上面OutputBlock()就是新new出来还没有to(device)

**正确写法：**

class MaNet(nn.Module):
    def __init__(self):
        super(MaNet,self).__init__()
        self.first_channel = 256
        self.layer0 = nn.Sequential(
            Conv_3x3(8,32),
            Conv_3x3(32,3),
        )
        self.layer1 = Conv_7x7(3,64)
        self.layer2 = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
            resnet18(64)
        )

        self.double_conv1 = nn.Sequential(
            Conv_3x3(self.first_channel,self.first_channel//2), #256,1258
            Conv_3x3(self.first_channel//2,self.first_channel//2),
        )
        self.double_conv2 = nn.Sequential(
            Conv_3x3(self.first_channel//2,self.first_channel//4),
            Conv_3x3(self.first_channel//4,self.first_channel//4),
        )
        self.double_conv3 = nn.Sequential(
            Conv_3x3(self.first_channel // 2, self.first_channel // 4),
            Conv_3x3(self.first_channel // 4, self.first_channel // 4),
        )


        self.ds1 = Ds1()
        self.ds2 = Ds2()
        self.ds3 = Ds3()
        self.conv3x3_1 = Conv_3x3(256,128)
        self.conv3x3_2 = Conv_3x3(128,64)
        self.conv3x3_3 = Conv_3x3(64, 64)
        self.outblock = OutputBlock()

    def forward(self,x):
        #print(f"x:{x.device}")
        x0 = self.layer0(x)
        #print(f"x0:{x0.device}")
        x1 = self.layer1(x0)
        #print(f"x1:{x1.device}")

        x2, x3, x4= self.layer2(x1)
        #print(f"x2:{x2.device}")
        #print(f"x3:{x3.device}")
        #print(f"x4:{x4.device}")

        res_x1 = self.ds1(x1)
        res_x2 = self.ds2(x2)
        res_x3 = self.ds3(x3)

        #first upsample
        up_x4 = F.interpolate(x4,scale_factor=2,mode="nearest")
        up_x4 = self.conv3x3_1(up_x4)
        com_x3 = torch.concat((x3,up_x4),dim=1)
        out_com_x3 = self.double_conv1(com_x3)
        #second upsample
        out_com_x3 = F.interpolate(out_com_x3,scale_factor=2,mode="nearest")
        out_com_x3 = self.conv3x3_2(out_com_x3)
        com_x2 = torch.concat((x2, out_com_x3), dim=1)
        out_com_x2 = self.double_conv2(com_x2)
        #third upsample
        out_com_x2 = F.interpolate(out_com_x2,scale_factor=2,mode="nearest")
        up_x2 = self.conv3x3_3(out_com_x2)
        com_x1 = torch.concat((x1,up_x2),dim=1)
        out_com_x1 = self.double_conv3(com_x1)
        #the last upsample
        out_com_x1 = F.interpolate(out_com_x1, scale_factor=2, mode="nearest")
        return res_x1 ,res_x2, res_x3, self.outblock(out_com_x1)

最后，说实话这是我基础不牢造成的，真的很难受，这个问题耽搁了好久。通过代院一点点的手把手教学可算学会了排查错误的方法！！
```text
上面的报错信息显示DS1()模块出现问题，但是单独调试DS1模块是可以行的通的，刚开始将注意点关注到了Conv_3x3,但是仔细查看后我构建的Conv_3x3中的Conv2d就是调用的原接口，没有啥创新的，同时检查了Conv_3x3中input，和output的datatype，发现已经成为cuda.float。最后看到了第二个帖子发现就是在forward中new了新方法。。狗血[捂脸]
```
同时感谢前人帖子的帮助
[model.to(device)无法将自定义层的tensor转移到指定设备\_pyxiea的博客-CSDN博客](https://blog.csdn.net/xpy870663266/article/details/104071765)

[Pytorch避坑之：RuntimeError: Input type(torch.cuda.FloatTensor) and weight type(torch.FloatTensor) shoul\_暖仔会飞的博客-CSDN博客](https://blog.csdn.net/qq_42902997/article/details/122594017)

Victorshengw

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

RuntimeError Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same
复制链接

扫一扫