from __future__ import absolute_import, division, print_function
import logging
import numpy as np
import os
import random
import sys
import time
import torch
from argparse import Namespace
from torch. utils. data import ( DataLoader, RandomSampler, SequentialSampler,
TensorDataset)
from tqdm import tqdm
from transformers import ( BertConfig, BertForSequenceClassification, BertTokenizer, )
from transformers import glue_compute_metrics as compute_metrics
from transformers import glue_output_modes as output_modes
from transformers import glue_processors as processors
from transformers import glue_convert_examples_to_features as convert_examples_to_features
logger = logging. getLogger( __name__)
logging. basicConfig( format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s' ,
datefmt = '%m/%d/%Y %H:%M:%S' ,
level = logging. WARN)
logging. getLogger( "transformers.modeling_utils" ) . setLevel(
logging. WARN)
print ( torch. __version__)
1.5.0
!python download_glue_data. py - - data_dir= 'glue_data' - - tasks= 'MRPC'
Processing MRPC...
Local MRPC data not specified, downloading data from https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt
Completed!
torch. set_num_threads( 1 )
print ( torch. __config__. parallel_info( ) )
ATen/Parallel:
at::get_num_threads() : 1
at::get_num_interop_threads() : 3
OpenMP 201511 (a.k.a. OpenMP 4.5)
omp_get_max_threads() : 1
Intel(R) Math Kernel Library Version 2020.0.1 Product Build 20200208 for Intel(R) 64 architecture applications
mkl_get_max_threads() : 1
Intel(R) MKL-DNN v0.21.1 (Git Hash 7d2fd500bc78936d1d648ca713b901012f470dbc)
std::thread::hardware_concurrency() : 6
Environment variables:
OMP_NUM_THREADS : [not set]
MKL_NUM_THREADS : [not set]
ATen parallel backend: OpenMP
configs = Namespace( )
configs. output_dir = "./MRPC/"
configs. data_dir = "./glue_data/MRPC"
configs. model_name_or_path = "bert-base-uncased"
configs. max_seq_length = 128
configs. task_name = "MRPC" . lower( )
configs. processor = processors[ configs. task_name] ( )
configs. output_mode = output_modes[ configs. task_name]
configs. label_list = configs. processor. get_labels( )
configs. model_type = "bert" . lower( )
configs. do_lower_case = True
configs. device = "cpu"
configs. per_gpu_eval_batch_size = 8
configs. n_gpu = 0
configs. local_rank = - 1
configs. overwrite_cache = False
def set_seed ( seed) :
random. seed( seed)
np. random. seed( seed)
torch. manual_seed( seed)
set_seed( 42 )
tokenizer = BertTokenizer. from_pretrained(
configs. output_dir, do_lower_case= configs. do_lower_case)
model = BertForSequenceClassification. from_pretrained( configs. output_dir)
model. to( configs. device)
/home/qy/.conda/envs/pytorch/lib/python3.6/site-packages/transformers-2.10.0-py3.6.egg/transformers/tokenization_utils.py:831: FutureWarning: Parameter max_len is deprecated and will be removed in a future release. Use model_max_length instead.
category=FutureWarning,
BertForSequenceClassification(
(bert): BertModel(
(embeddings): BertEmbeddings(
(word_embeddings): Embedding(30522, 768, padding_idx=0)
(position_embeddings): Embedding(512, 768)
(token_type_embeddings): Embedding(2, 768)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): BertEncoder(
(layer): ModuleList(
(0): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(1): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(2): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(3): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(4): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=