import os
import sys
import pickle
import logging
logging.basicConfig(
format='>>> %(asctime)s %(levelname)s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=logging.INFO,
stream=sys.stdout,
# force=True,
)
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
def load_txt(path, mode):
'''
TODO
from random import shuffle
'''
with open(path, mode='rt', encoding='utf-8') as f:
lines = f.readlines()
if mode == 'tr':
xl,yl= [],[]
for line in lines:
line = line.strip()
parts = line.split('\t')
if len(parts) == 2:
string,lbl = parts
xl.append(string)
yl.append(int(lbl))
return xl,yl
elif mode == 'te':
xl = []
for line in lines:
line = line.strip()
xl.append(line)
return xl
else:
raise KeyError
def load_data():
logging.info
中文新闻标题分类
于 2022-04-26 20:53:12 首次发布
该博客介绍了如何在华为云ModelArts平台上,利用PyTorch和Transformers库实现BERT模型对中文新闻标题的分类。实验环境为PyTorch 1.8,CUDA 10.2,CUDNN 7,配备了1块32GB的V100 GPU和8核64GB的CPU。
摘要由CSDN通过智能技术生成