简单应用Transformers的预训练模型做文本分类
import os
import logging
import numpy as np
import transformers
from datasets import Dataset
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import Trainer, TrainingArguments, BertTokenizer, BertForSequenceClassification
transformers.set_seed(1)
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
logging.basicConfig(level=logging.INFO)
class TextClassification():
def data_show(self, data_file):
with open(data_file, 'r', encoding='utf-8') as f:
data = f.readlines()
logging.info("获取数据:%s" % len(data))
tags_data_dict = {
}
for line in data:
text_label = line.strip().split('\t')
if text_label[1] in tags_data_dict:
tags_data_dict[text_label[1]].append(text_label[0])
else:
tags_data_dict[text_label[1]] =