python文本内容情感倾向分析

LLL666@

于 2024-05-07 11:12:35 发布

阅读量137

点赞数 1

文章标签： python 开发语言

本文链接：https://blog.csdn.net/LLL666gzh/article/details/138524247

版权

#本文的代码是根据B站UP主“你好我是大河”的视频教程编写#

P1文本情感倾向分析

import json
import requests

content_txt = json.dumps({
    "text": '一键三连的同学，都是好同学！'
})

header = {
    'Content-Type': 'application/json'
}
host = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=*****&client_secret=******"
response = requests.get(host)
# if response:
#     print(response.json())
mytoken = response.json()['access_token']

url1 = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify'
myurl = url1 + '?charset=UTF-8&access_token=' + mytoken

results = requests.post(url = myurl, headers = header, data = content_txt).json()

for item in results:
    print(item, ":", results[item])

P2情感倾向分析批量处理

import json
import requests

# 根据感情倾向数值进行可视化的呈现
def process_bar(percent):
    repeat_times = int(percent * 10)
    bar = '感情正向->' + '开心' * repeat_times + '伤心' * (10 - repeat_times) + '<-感情负向'
    print(bar)

# 获取access_token
host = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=******&client_secret=******"
response = requests.get(host)
mytoken = response.json()['access_token']

def txt_mark(mystr, mytoken):

    # 函数输入文本的json格式，用于接收文本，并转为json

    content_txt = json.dumps({
        "text": mystr
    })

    # 函数传参，固定值
    header = {
        'Content-Type': 'application/json'
    }

    #根据官网获取的api接口地址，并拼接对应参数
    url1 = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify'
    myurl = url1 + '?charset=UTF-8&access_token=' + mytoken

    # 发起请求，并提取关键信息，共2处，文本的情感正向数值
    results = requests.post(url=myurl, headers=header, data=content_txt).json()
    # print(results)
    input_text = results['text']
    positive_prob = results['items'][0]['positive_prob']

    # 输出文本和情感倾向的可视化呈现
    print(input_text)
    print(positive_prob)   #这个输出的是积极情感的数值
    # process_bar(positive_prob)  #情感倾向可视化

# 分割线用于批量处理的时候，内容分割
    print('-'*40)   #画分割线


str1 = """曾经有一段真挚的爱情摆在我面前，我没有珍惜，等我失去的时候，我才后悔莫及。
人世间最痛苦的事莫过于此。如果上天能够给我一个，再来一次的机会，我会对那个女孩子说三个字：我爱你。
如果非要在这份爱上加一个期限，我希望是一万年。"""
str2 = """冯唐易老，李广难封，屈贾谊于长沙，非无圣主；
窜梁鸿于海曲，岂乏明时？所赖君子见机，达人知命。
老当益壮，宁移白首之心，穷且益坚，不坠青云之志。"""
str3 = """我走了很远的路，吃了很多苦，才将这段视频送到你的面前，
一路风雨泥泞，许多不易，如梦一场，仿佛昨日，
如果能得到你的一键三连，那将是很好的。如果获得投币，更是意外之喜，欢心之至。"""

txt_list = [str1, str2, str3]

for mytxt in txt_list:
    txt_mark(mytxt, mytoken)

P3优化最终版

import aip
from xlrd import open_workbook   #用来读取Excel
import time

def sentiment_classify(txt):
    client_appId = '**'  # 定义变量，3个参数
    client_ak = '**'
    client_sk = '**'
    my_nlp = aip.nlp.AipNlp(client_appId, client_ak, client_sk)
    # print(my_nlp.sentimentClassify(txt))
    return my_nlp.sentimentClassify(txt)['items'][0]['positive_prob']


work_book = open_workbook(r'文件路径')   # 打开文件，然后读取文件里面的内容；
sheet_name = work_book.sheet_names()   # 先获取Excel表里面的sheet表；可以用print()功能检查一下，打印所有sheet名称
sheet_by_name = work_book.sheet_by_name('Sheet1')   #自定义一个变量“sheet_by_name”，可以根据表的名称去锁定我们所用到的那张表
txt_content = sheet_by_name.col_values(0)   #读取选定工作表的第一列的内容

positive_times = 0
negative_times = 0
for txt in txt_content:
    time.sleep(1)
    if sentiment_classify(txt) > 0.5:
        positive_times += 1
    else:
        negative_times += 1

print("分析完成，正向{}条，负向{}条".format(positive_times, negative_times))   #这个地方比UP主的多了一个“*args:”

P4优化最终版不改啦

import aip
import time
import pandas as pd
from tqdm import tqdm

content_list = []
positive_prob_list = []
negative_prob_list = []

def sentiment_classify(txt):
    client_appId = '**'  # 定义变量，3个参数
    client_ak = '**'
    client_sk = '**'
    my_nlp = aip.nlp.AipNlp(client_appId, client_ak, client_sk)
    results = my_nlp.sentimentClassify(txt)   #定义一个变量，去接收我们分析的结果
    positive_prob = results['items'][0]['positive_prob']
    negative_prob = results['items'][0]['negative_prob']

    content_list.append(txt)
    positive_prob_list.append(positive_prob)
    negative_prob_list.append(negative_prob)
    return positive_prob


df = pd.read_excel(r'文件路径')   #用pandas读取数据文件
txt_content = df["content"]
# print(txt_content)

positive_times = 0
negative_times = 0
for txt in tqdm(txt_content):
    time.sleep(1)
    if sentiment_classify(txt) > 0.5:
        positive_times += 1
    else:
        negative_times += 1

df_result = pd.DataFrame({"content": content_list, "positive_prob": positive_prob_list, "negative_prob": negative_prob_list})   #用数组构建DataFrame表格的方法
df_result.to_excel(r'输出文件路径')   # 直接导出数据表到excel文件
print("分析完成，正向{}条，负向{}条".format(positive_times, negative_times))   #这个地方比UP主的多了一个“*args:”