self.global_model=ImageTextClassifier( (img_proj): Sequential( 各层模型维度

self.global_model=ImageTextClassifier(
  (img_proj): Sequential(
    (0): Linear(in_features=1280, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=128, out_features=128, bias=True)
  )
  (text_rnn): GRU(512, 128, batch_first=True, dropout=0.1)
  (fuse_att): FuseBaseSelfAttention(
    (att_fc1): Linear(in_features=128, out_features=512, bias=True)
    (att_pool): Tanh()
    (att_fc2): Linear(in_features=512, out_features=6, bias=True)
  )
  (classifier): Sequential(
    (0): Linear(in_features=768, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=64, out_features=8, bias=True)
  )
)

这个模型是一个多模态分类器,它结合了图像特征和文本特征,并通过自注意力机制进行特征融合,最后通过分类器输出最终预测。


1. img_proj: 图像特征降维模块

Sequential(
    (0): Linear(in_features=1280, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=128, out_features=128, bia
class RetrievalDatabase(Retriever): def __init__(self, modelname, backbone=None, device="cuda"): super().__init__(modelname, backbone, device) self.ivf_builder = self.asmk.create_ivf_builder() self.kf_counter = 0 self.kf_ids = [] self.query_dtype = torch.float32 self.query_device = device self.centroids = torch.from_numpy(self.asmk.codebook.centroids).to( device=self.query_device, dtype=self.query_dtype ) # Mirrors forward_local in extract_local_features from retrieval/model.py def prep_features(self, backbone_feat): retrieval_model = self.model # extract_features_and_attention without the encoding! backbone_feat_prewhitened = retrieval_model.prewhiten(backbone_feat) proj_feat = retrieval_model.projector(backbone_feat_prewhitened) + ( 0.0 if not retrieval_model.residual else backbone_feat_prewhitened ) attention = retrieval_model.attention(proj_feat) proj_feat_whitened = retrieval_model.postwhiten(proj_feat) # how_select_local in topk_features, _, _ = how_select_local( proj_feat_whitened, attention, retrieval_model.nfeat ) return topk_features def update(self, frame, add_after_query, k, min_thresh=0.0): feat = self.prep_features(frame.feat) id = self.kf_counter # Using own counter since otherwise messes up IVF feat_np = feat[0].cpu().numpy() # Assumes one frame at a time! id_np = id * np.ones(feat_np.shape[0], dtype=np.int64) database_size = self.ivf_builder.ivf.n_images # print("Database size: ", database_size, self.kf_counter) # Only query if already an image topk_image_inds = [] topk_codes = None # Change this if actualy querying if self.kf_counter > 0: ranks, ranked_scores, topk_codes = self.query(feat_np, id_np) scores = np.empty_like(ranked_scores) scores[np.arange(ranked_scores.shape[0])[:, None], ranks] = ranked_scores scores = torch.from_numpy(scores)[0] topk_images = torch.topk(scores, min(k, database_size)) valid = topk_images.values > min_thresh topk_image_inds = topk_images.indices[valid] topk_image_inds = topk_image_inds.tolist() if add_after_query: self.add_to_database(feat_np, id_np, topk_codes) return topk_image_inds # The reason we need this function is becasue kernel and inverted file not defined when manually updating ivf_builder def query(self, feat, id): step_params = self.asmk.params.get("query_ivf") images2, ranks, scores, topk = self.accumulate_scores( self.asmk.codebook, self.ivf_builder.kernel, self.ivf_builder.ivf, feat, id, params=step_params, ) return ranks, scores, topk 分析代码
03-25
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值