ckpt转bin模型报错解决：AttributeError: ‘BertForPreTraining‘ object has no attribute ‘shape‘ #393

clearlove100

已于 2022-07-05 09:58:15 修改

阅读量959

点赞数

分类专栏： nlp 文章标签： nlp python

于 2022-07-05 09:49:46 首次发布

本文链接：https://blog.csdn.net/clearlove100/article/details/125613581

版权

nlp 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

首先修改modeling_bert.by中的load_tf_weights_in_bert为：

def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
    try:
        import re
        import numpy as np
        import tensorflow as tf
    except ImportError:
        logger.error(
            "Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see "
            "https://www.tensorflow.org/install/ for installation instructions."
        )
        raise
    tf_path = os.path.abspath(tf_checkpoint_path)
    logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_path)
    names = []
    arrays = []
    for name, shape in init_vars:
        logger.info(f"Loading TF weight {name} with shape {shape}")
        array = tf.train.load_variable(tf_path, name)
        names.append(name)
        arrays.append(array)
    for name, array in zip(names, arrays):
        name = name.split("/")
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if any(
            ["adam_v", "adam_m", "global_step", "bad_steps", "global_step", "good_steps", "loss_scale",
                     "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "save_counter", ".OPTIMIZER_SLOT"] for n in name) or \
                name[0] == "optimizer":
        #     n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
        #     for n in name
        # ):
            logger.info(f"Skipping {'/'.join(name)}")
            continue
        if ".OPTIMIZER_SLOT" in name:
            idx = name.index(".OPTIMIZER_SLOT")
            name = name[:idx]
        elif ".ATTRIBUTES" in name:
            idx = name.index(".ATTRIBUTES")
            name = name[:idx]
        print(name)
        pointer = model
        for m_name in name:
            if re.fullmatch(r"[A-Za-z]+_\d+", m_name):
                scope_names = re.split(r"_(\d+)", m_name)
            else:
                scope_names = [m_name]
            if scope_names[0] == "kernel" or scope_names[0] == "gamma":
                pointer = getattr(pointer, "weight")
            elif scope_names[0] == "output_bias" or scope_names[0] == "beta":
                pointer = getattr(pointer, "bias")
            elif scope_names[0] == "output_weights":
                pointer = getattr(pointer, "weight")
            elif scope_names[0] == "squad":
                pointer = getattr(pointer, "classifier")
            elif scope_names[0] == "dense_output" or scope_names[0] == "bert_output":
                pointer = getattr(pointer, "output")
            elif scope_names[0] == "self_attention":
                pointer = getattr(pointer, "self")
            else:
                try:
                    pointer = getattr(pointer, scope_names[0])
                except AttributeError:
                    logger.info("Skipping {}".format("/".join(name)))
                    continue
            if len(scope_names) >= 2:
                num = int(scope_names[1])
                pointer = pointer[num]
        if m_name[-11:] == "_embeddings":
            pointer = getattr(pointer, "weight")
        elif m_name == "kernel" or m_name == "gamma" or m_name == "output_weights":
            array = np.transpose(array)
        # try:
        #     if pointer.shape != array.shape:
        #         raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
        # except AssertionError as e:
        #     e.args += (pointer.shape, array.shape)
        #     raise
        logger.info(f"Initialize PyTorch weight {name}")
        pointer.data = torch.from_numpy(array)
    return model

tf模型转为torch模型代码，如下：

import argparse
import os
import torch

from transformers import BertConfig, BertForPreTraining, load_tf_weights_in_bert
from transformers.utils import logging

logging.set_verbosity_info()
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print(f"Building PyTorch model from configuration: {config}")
    model = BertForPreTraining(config)
    # Load weights from tf checkpoint
    load_tf_weights_in_bert(model, config, tf_checkpoint_path)
    # Save pytorch-model
    os.makedirs(pytorch_dump_path)
    pytorch_dump_path = os.path.join(pytorch_dump_path, '0')
    print(f"Save PyTorch model to {pytorch_dump_path}")
    torch.save(model.state_dict(), pytorch_dump_path)
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # Required parameters
    parser.add_argument(
        "--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
    )
    parser.add_argument(
        "--bert_config_file",
        default=None,
        type=str,
        required=True,
        help="The config json file corresponding to the pre-trained BERT model. \n"
        "This specifies the model architecture.",
    )
    parser.add_argument(
        "--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
    )
    args = parser.parse_args()
    convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.bert_config_file, args.pytorch_dump_path)