【人工智能】使用先前训练好的BERT模型进行情感预测的Python程序

阿寻寻

于 2024-08-17 03:15:00 发布

阅读量230

点赞数 4

文章标签：人工智能 bert python

本文链接：https://blog.csdn.net/weixin_46453070/article/details/141188089

版权

下面是一个使用先前训练好的BERT模型进行情感预测的Python程序。程序将读取指定文件夹中的多个JSON文件，并对每个文件中的评论内容进行情感分析，最后输出每个课程的正面情感比例。

import os
import json
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from datasets import Dataset
import numpy as np

# 设置设备
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Using device: {device}')

# 加载模型和tokenizer
model_save_path = './final_saved_model'
model = BertForSequenceClassification.from_pretrained(model_save_path)
tokenizer = BertTokenizer.from_pretrained(model_save_path)
model.to(device)

# 定义文件夹路径
folder_path = './comments_folder'  # 这里替换为你的文件夹路径

# 定义预测函数
def predict_sentiment(contents):
    data = {'text': contents}
    dataset = Dataset.from_dict(data)
    
    def tokenize_function(examples):
        return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)
    
    dataset = dataset.map(tokenize_function, batched=True)
    dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
    
    loader = torch.utils.data.DataLoader(dataset, batch_size=16)
    
    model.eval()
    predictions = []
    with torch.no_grad():
        for batch in loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = np.argmax(logits.cpu().numpy(), axis=-1)
            predictions.extend(preds)
    
    return predictions

# 定义课程正面情感比例计算函数
def calculate_positive_ratio(json_files):
    results = {}
    for file_name in json_files:
        course_name = file_name.replace('-comments.json', '')
        with open(os.path.join(folder_path, file_name), 'r', encoding='utf-8') as file:
            comments = json.load(file)
            contents = [comment['content'] for comment in comments]
            predictions = predict_sentiment(contents)
            positive_count = sum(1 for pred in predictions if pred == 0)  # 假设标签0表示正面情感
            total_count = len(predictions)
            positive_ratio = positive_count / total_count if total_count > 0 else 0
            results[course_name] = positive_ratio
    return results

# 获取文件夹中的所有JSON文件
json_files = [f for f in os.listdir(folder_path) if f.endswith('-comments.json')]

# 计算每个课程的正面情感比例
positive_ratios = calculate_positive_ratio(json_files)

# 输出结果
for course_name, ratio in positive_ratios.items():
    print(f"课程: {course_name}, 正面情感比例: {ratio:.2f}")