原文链接:https://arxiv.org/abs/2010.04159
encoder_layer = MSDeformAttnTransformerEncoderLayer
MSDeformAttnTransformerEncoderLayer(
(self_attn): MSDeformAttn(
(sampling_offsets): Linear(in_features=256, out_features=192, bias=True)
(attention_weights): Linear(in_features=256, out_features=96, bias=True)
(value_proj): Linear(in_features=256, out_features=256, bias