Python虚假新闻检测识别

最新推荐文章于 2025-05-11 08:42:46 发布

荷塘月色2

最新推荐文章于 2025-05-11 08:42:46 发布

阅读量1k

点赞数 24

文章标签： python 开发语言人工智能

本文链接：https://blog.csdn.net/alicema1111/article/details/147670943

版权

程序示例精选
Python虚假新闻检测识别
如需安装运行环境或远程调试，见文章底部个人QQ名片，由专业技术人员远程协助！

前言

这篇博客针对《Python虚假新闻检测识别》编写代码，代码整洁，规则，易读。学习与应用推荐首选。

文章目录

一、所需工具软件
二、使用步骤
1. 主要代码
2. 运行结果
三、在线协助

一、所需工具软件

1. Python
2. Pycharm

二、使用步骤

代码如下（示例）：


# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
def preprocess_text(text, vectorizer, svd, lda, stop_word):
    """
    对输入的新闻文本进行预处理：
    1. 分词并去除停用词
    2. 使用 TF-IDF 特征提取
    3. 使用 SVD 和 LDA 降维
    """
    # 中文分词，去除停用词
    words = jieba.cut(text)
    s = ''
    for word in words:
        if word not in stop_word:
            if s != '':
                s = s + ' ' + word
            else:
                s = word
    
def load_stopwords(filepath):
    """
    加载停用词表
    """
    with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
        stop_words = f.read().splitlines()
    return stop_words

def main():
    # 加载停用词
    stop_word = load_stopwords('stop_word.txt')

    # 加载训练时使用的 TF-IDF 向量化器、SVD 和 LDA 模型
    vectorizer = joblib.load('tfidf_vectorizer.joblib')
    svd = joblib.load('svd_model.joblib')
    lda = joblib.load('lda_model.joblib')

    # 加载训练好的 RidgeClassifier 模型
    clf = joblib.load('ridge_classifier_model.joblib')

    # 输入一条新闻文本
    #news_text = "她用一招，吓退了企图不轨的滴滴司机"  #真新闻
    news_text = "刘翔用1秒钟跑完100米"  #假新闻

    # 置信度阈值（可根据需求调整）
    confidence_threshold = 0.3

    # 预处理输入的新闻文本
    features = preprocess_text(news_text, vectorizer, svd, lda, stop_word)

    # 使用模型进行预测
    prediction = clf.predict(features)

    # 获取决策函数的输出值
    decision_scores = clf.decision_function(features)

    # 将决策函数的输出值转换为概率值（简单的 sigmoid 函数）
    probabilities = 1 / (1 + np.exp(-decision_scores))
    # 根据置信度阈值判断预测结果
    if prob_class_1 >= confidence_threshold:
        final_prediction = 1
        print(f"预测类别：{final_prediction} 假新闻（高置信度）")
    elif prob_class_1 <= (1 - confidence_threshold):
        final_prediction = 0
        print(f"预测类别：{final_prediction} 真新闻（高置信度）")
    else:
        final_prediction = "未知"
        print("预测类别：未知 （置信度过低）")

    # 输出置信度
    print(f"置信度（属于类别 1 的概率）：{prob_class_1:.4f}")

if __name__ == '__main__':
    main()