AlphaFold3 data_transforms 模块的 make_msa_feat
函数的作用是为多序列比对(MSA)数据创建和拼接特征,生成 msa_feat
和 target_feat 特征,
这些特征将被用于后续的模型输入。
源代码:
@curry1
def make_msa_feat(protein):
"""Create and concatenate MSA features."""
# Whether there is a domain break. Always zero for chains, but keeping for
# compatibility with domain datasets.
has_break = torch.clip(
protein["between_segment_residues"].to(torch.float32), 0, 1
)
aatype_1hot = make_one_hot(protein["aatype"], 21)
target_feat = [
torch.unsqueeze(has_break, dim=-1),
aatype_1hot, # Everyone gets the original sequence.
]
msa_1hot = make_one_hot(protein["msa"], 23)
has_deletion = torch.clip(protein["deletion_matrix"], 0.0, 1.0)
deletion_value = torch.atan(protein["deletion_matrix"] / 3.0) * (
2.0 / np.pi
)
msa_feat = [
msa_1hot,
torch.unsqueeze(has_deletion, dim=-1),
torch.unsqueeze(deletion_value, dim=-1),
]
if "cluster_profile" in protein:
deletion_mean_value = torch.atan(
protein["cluster_deletion_mean"] / 3.0
) * (2.0 / np.pi)
msa_feat.extend(
[
protein["cluster_profile"],
torch.unsqueeze(deletion_mean_value, dim=-1),
]
)
if "extra_deletion_matrix" in protein:
protein["extra_has_deletion"] = torch.clip(
protein["extra_deletion_matrix"], 0.0, 1.0
)
protein["extra_deletion_value"] = torch.atan(
protein["extra_