语意情感分析和Gephi图分析

第一部分

在这个博客中,实现了一个简单的语意情感分析,将文件中的所有单词,逐句进行分析,并对每个句子给出一个评分。1表示正面,0表示中立,-1表示负面。

其中用到了语意字典,拿到某个名次或动词后,要在字典中查找其感情色彩,并将该值记录。每个句子输出一个分数。

代码如下:

import nltk
from nltk.tokenize import sent_tokenize
import csv
import numpy

def getposneg(searchword):
    wordcsv_file = open("wordcsv_simp.csv","r")

    pos = []
    neg = []
    found = 0
    for row in csv.DictReader(wordcsv_file):
        if row['Word'] == searchword:
            pos.append(float(row['Pos']))
            neg.append(float(row['Neg']))
            found = 1

    if found == 0:
        return 0
    else:
        #print "pos=%s, neg=%s"%(numpy.mean(pos),numpy.mean(neg))
        if numpy.mean(pos) - numpy.mean(neg) > 0.1:
            return 1
        elif numpy.mean(pos) - numpy.mean(neg) < -0.1:
            return -1
        else:
            return 0

    wordcsv_file.close()

file = open('1155099711.txt')
text = file.read()
filename = open('sentiment_scores.txt','a')

text = text.replace(",",".")

# tokenize opinion into sentences. Try to break the sentences by comma as well
sent_tokenize_list = sent_tokenize(text)
lenSent = len(sent_tokenize_list)#remember the counts of sentences

#negative_prefix_list,for checking the pre words of adj so to find if it is not positive
negative_prefix_list = ['no','not','nor','none','never','nothing','neither','hardly','barely','rarely','scarcely','little','few','seldom']

# examine each sentence one by one
for k in range(lenSent):
    total_score = 0
    #print sent_tokenize_list[k]
    filename.write(sent_tokenize_list[k]+'\n')
    text_tok = nltk.word_tokenize(sent_tokenize_list[k])

    Tag = nltk.pos_tag(text_tok)
    #print "Number of tag = %s"%len(Tag)
    lenTag = len(Tag)

    np = [];
    flag = [1 for i in range(lenTag)]#set all the flag of Tag is 1
    nlist = [];

    # try to group several nouns 'NN' / 'NNP' into noun phrases
    for t in range(lenTag):
        if flag[t] == 1:
            if Tag[t][1] == "NN":
                if Tag[t+1][1] == "NN":
                    np.append(text_tok[t]+" "+ text_tok[t+1])
                    flag[t+1] = 0
                    flag[t] = 0
                    nlist.append(t+1)
                else:
                    flag[t] = 0
                    np.append(text_tok[t])
                    nlist.append(t)
            elif Tag[t][1] == "NNP":
                if Tag[t+1][1] == "NNP" and Tag[t+2][1] == "NN":
                    flag[t] = 0
                    flag[t+1] = 0
                    flag[t+2] = 0
                    np.append(text_tok[t]+" "+text_tok[t+1]+" "+ text_tok[t+2])
                    nlist.append(t+2)
                elif Tag[t+1][1] == "NN":
                    flag[t] = 0
                    flag[t+1] = 0
                    np.append(text_tok[t]+" "+text_tok[t+1])
                    nlist.append(t+1)
                else:
                    flag[t] = 0
                    np.append(text_tok[t])
                    nlist.append(t)

    #print "List of noun=%s"%np
    #print "Corresponding number=%s"%nlist

    # Find sentiment + check negation
    jlist = [];
    sentlist = [];#sentiments of adj and verbs
    for t in range(lenTag):
        if Tag[t][1] == "JJ" or Tag[t][1] =='JJS' or Tag[t][1] == 'JJR':   # find adjectives
            jlist.append(t)
            #print 'adj',text_tok[t]
            sentiment = getposneg(text_tok[t])
            #print 'adj sentiment',sentiment
            if text_tok[t-1] in negative_prefix_list or text_tok[t-2] in negative_prefix_list: # you should add more negative words
                sentiment *= -1
            total_score = total_score + sentiment # add every single sentiment score to total score
            if sentiment > 0:
                jj_sentiment = "positive"
            elif sentiment < 0:
                jj_sentiment = "negative"
            else:
                jj_sentiment = "neutral"
            sentlist.append(jj_sentiment)
            #print "Adj: %s at %s is %s"%(Tag[t][0],t,jj_sentiment)
    if len(jlist) == 0:  # if there is no adjectives, find verbs that may have sentiments
        for t in range(lenTag):
            if (Tag[t][1] == "VB" or  Tag[t][1] == "VBD" or Tag[t][1] == "VBG" or Tag[t][1] == "VBN" or Tag[t][1] == "VBP" or Tag[t][1] == "VBZ"):   
                #print 'verb word',text_tok[t]
                sentiment = getposneg(text_tok[t])
                total_score = total_score + 0.5*sentiment
                #print 'verb sentiment',sentiment
                if sentiment > 0:
                    verb_sentiment = "positive"
                elif sentiment < 0:
                    verb_sentiment = "negative"
                else:
                    verb_sentiment = "neutral"
                sentlist.append(verb_sentiment)
                #print "Verb: %s at %s is %s"%(Tag[t][0],t,verb_sentiment)

    if total_score > 0:
        total_score = 1
    elif total_score <0:
        total_score = -1
    else:
        total_score = 0

    #print 'Total score of this sentence: ',total_score
    input_string = 'Total score of this sentence: ' + str(total_score) +'\n' +"---------------------------------------------"+'\n\n'

    #with open(filename,'a') as f:
    filename.write(input_string)

file.close()

第二部分

这部分使用node之间的关系,将40个人组成的小社团之间的社交关系进行了分析,生成node文件和edge文件,并导入到Gephi中,绘制出他们之间的社交关系网络图。

代码如下:

import networkx as nx
import csv

h = nx.DiGraph()
for i in range(1,41):
    h.add_node(i)

blog = list(csv.reader(open('blogpost4.csv','r')))

for i in range(1,len(blog)):
    line = blog[i]
    for j in range(len(line)):
        if line[j] !='0':
            h.add_edge(i,j+1,weight=int(line[j]))

with open('edges.csv','w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Row','Column','Weight'])
    for (u,v,d) in h.edges(data='weight'):
        writer.writerow([u,v,d])


with open('N.csv','w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Id','Label'])
    for node in h.nodes():
        writer.writerow([int(node),'U'+str(node)])

with open('E.csv','w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Source','Target','Type','Weight'])
    for (u,v,d) in h.edges(data='weight'):
        writer.writerow([u,v,'Directed',d])


print 'density:',nx.density(h)

avg_indegree = 0
for key,value in nx.in_degree_centrality(h).items():
    avg_indegree = avg_indegree + value
    if key == 33:
        print 'My own in-degree:',value
avg_indegree = float(avg_indegree/40)
print 'avg_indegree:',avg_indegree

#print 'outdegree centrality',nx.out_degree_centrality(h)
avg_outdegree = 0
for key,value in nx.out_degree_centrality(h).items():
    avg_outdegree = avg_outdegree + value
    if key == 33:
        print 'My own out-degree:',value
avg_outdegree = float(avg_outdegree/40)
print 'avg_outdegree:',avg_outdegree

#print 'betweenness centrality',nx.betweenness_centrality(h)
avg_betweenness = 0
for key,value in nx.betweenness_centrality(h).items():
    avg_betweenness = avg_betweenness + value
    if key == 33:
        print 'My own betweenness centrality:',value
avg_betweenness = avg_betweenness/40
print 'avg_betweenness:',avg_betweenness

#print 'closeness centrality:',nx.closeness_centrality(h)
avg_closeness = 0
for key,value in nx.closeness_centrality(h).items():
    avg_closeness = avg_closeness + value
    if key == 33:
        print 'My own closeness centrality',value
avg_closeness = avg_closeness/40
print 'avg_closeness:',avg_closeness

社交关系图如下:
这里写图片描述

  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Gephi 中使用 Newman 算法进行可视化分析的步骤如下: 1. 首先,你需要安装 Gephi 软件并打开它。 2. 导入你要进行可视化分析的网络数据。你可以通过点击菜单栏的 "File",选择 "Open" 来导入网络数据文件。 3. 在左侧面板中选择 "Statistics",然后点击右上角的 "Run" 按钮,打开统计功能。 4. 在弹出的 "Statistics" 窗口中,选择 "Network Overview",然后在右侧列表中找到 "Modularity",点击它并点击 "Run" 按钮。这将计算网络的模块化结构。 5. 在统计完成后,你可以查看新打开的 "Modularity Class" 窗口。这个窗口显示了网络中的模块化分区。 6. 接下来,你需要选择 "Layout" 功能来进行可视化布局。你可以在左侧面板中选择 "Layout",然后点击右上角的 "Run" 按钮。 7. 在弹出的 "Layout" 窗口中,选择合适的布局算法。对于 Newman 算法,你可以选择 "ForceAtlas 2" 或 "Fruchterman Reingold" 算法。 8. 调整布局参数以获得最佳的可视化效果。你可以根据需要调整参数如节点距离、引力、斥力等。 9. 点击 "Run" 按钮开始布局计算。计算完成后,你将看到网络以新的布局方式呈现。 10. 最后,你可以通过调整节点颜色、大小、标签等来进一步美化可视化效果。你可以在左侧面板的 "Appearance" 中进行相应设置。 以上就是在 Gephi 中使用 Newman 算法进行可视化分析的基本步骤。希望对你有所帮助!
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值