NLP迁移学习构建模型酒店评价预测

数据集:数据集是关于酒店评价的语料
目标:利用预训练模型将输入文本进行特征表示, 后接自定义网络输出结果
在这里插入图片描述

代码

import torch
import torch.nn as nn
from tensorflow.keras.preprocessing import  sequence
import pandas as pd
from sklearn.utils import shuffle
from functools import reduce

# 直接加载预训练模型进行输入文本的特征表示

source='huggingface/pytorch-transformers'
model_name='bert-base-chinese'
model=torch.hub.load(source,'model',model_name)
tokenizer=torch.hub.load(source,'tokenizer',model_name)
cutlen=32
def get_bert_encode(text):
	indexed_tensors=tokenizer.encode(text[:cutlen])[1:-1]
	indexed_tensors=sequence.pad_sequences([indexed_tensors],cutlen)
	token_tensor=torch.LongTensor(indexed_tensors)
	with torch.no_grad():
		encoder_layers,_=model(token_tensor)
	encoder_layers=encoder_layers[0]
	return encoder_layers

# 定义模型
class Net_model(nn.Module):
	def __init__(self,char_size=32,embedding_size=768):
		super().__init__()
		self.char_size=char_size
		self.embedding_size=embedding_size
		self.fc1=nn.Linear(self.char_size*self.embedding_size,2)

	def forward(self,x):
		x=x.view(-1,self.char_size*self.embedding_size)
		x=self.fc1(x)
		return x

# 构造训练数据验证数据批次生成器
def data_loader(train_data_path,valid_data_path,batch_size):
	train_data=pd.read_csv(train_data_path,header=None,sep='\t').drop([0])
	valid_data=pd.read_csv(valid_data_path,header=None,sep='\t').drop([0])
	if len(valid_data)<batch_size:
		raise(" batch size is not match")

	def __loader_generator(data):
		for batch in range(0,len(data),batch_size):
			batch_encoder=[]
			batch_labels=[]
			for item in shuffle(data.values.tolist())[batch:batch+batch_size]:
				encoders=get_bert_encode(item[0])
				batch_encoder.append(encoders)
				batch_labels.append([int(item[1])])
			encoder=reduce(lambda x,y:torch.cat((x,y),dim=0),batch_encoder)
			label=torch.tensor(reduce(lambda x,y:x+y,batch_labels))
		yield (encoder,label)

	return __loader_generator(train_data),__loader_generator(valid_data),len(train_data),len(valid_data)

# 定义训练函数
def train(train_data):
	total_loss=0
	total_acc=0
	for train_data,label in train_data:
		optimizer.zero_grad()
		result_output=net(train_data)
		loss=criterion(result_output,label)
		loss.backward()
		total_loss+=loss
		optimizer.step()
		total_acc+=(result_output.argmax(1)==label).sum().item()

	return total_loss,total_acc

# 定义验证函数
def valid(valid_data):
	total_valid_loss=0
	total_valid_acc=0
	for valid_datas,label in valid_data:
		with torch.no_grad():
			result_output=net(valid_data)
			loss=criterion(result_output,label)
			total_valid_loss+=loss
			total_valid_acc+=(result_output.argmax(1)==label).sum().item()

	return total_valid_loss,total_valid_acc


if __name__=='__main__':
	train_data_path='/root/data/cn_data/SST-2/train.tsv'
	valid_data_path='/root/data/cn_data/SST-2/dev.tsv'
	net=Net_model()
	epochs=6
	batch_size=32
	criterion=nn.CrossEntropyloss()
	optimizer=torch.optim.SGD(net.parameters(),lr=0.05,momentum=0.9)
	for epoch in range(epoches):
		train_data,valid_data,train_len,valid_len=data_loader(train_data_path,valid_data_path,batch_size)

		train_loss,train_acc=train(train_data)
		valid_loss,valid_acc=valid(valid_data)

        train_average_loss = train_loss* batch_size / train_len
        valid_average_loss = valid_loss* batch_size / valid_len

        train_average_acc = train_acc/  train_len
        valid_average_acc = valid_acc/ valid_len
        print("Train Loss:", train_average_loss, "|", "Train Acc:", train_average_acc)
        print("Valid Loss:", valid_average_loss, "|", "Valid Acc:", valid_average_acc)


	#保存模型	 
	torch.save(net.state_dict(),'./net_model_model.pkl')
	

#模型预测
if __name__=='__main__':
	text='酒店环境不错,设备也不错'
	加载模型
	net.load_state_dict(torch.load('./net_model_model.pkl')
	print("输入文本为:", text)
	with torch.no_grad():
		predict=net(get_bert_encode(text))
		print("预测的值为",torch.argmax(predict).item()

训练集验证集损失及正确率
在这里插入图片描述
预测结果

输入文本为: 酒店环境不错,设备也不错.
预测的值为: 1

  • 1
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值