import pandas as pd import networkx as nx from sklearn.feature_extraction.text import CountVectorizer from itertools import combinations # 读取Excel文件 file_path = 'cleaned_Laos_news.xlsx' sheet_name = 'Sheet1' # 修改为实际的工作表名称 data = pd.read_excel(file_path, sheet_name=sheet_name) # 清除NaN值,将它们替换为一个空字符串 data['cleaned_content'].fillna('', inplace=True) # 将所有文本内容合并为一个字符串 all_text = ' '.join(data['cleaned_content']) # 使用CountVectorizer提取词频 vectorizer = CountVectorizer(min_df=1, max_features=30) X = vectorizer.fit_transform([all_text]) words = vectorizer.get_feature_names_out() # 创建无向图 G = nx.Graph() # 添加节点 G.add_nodes_from(words) # 计算关键词的共现关系并添加边 for i in range(len(data)): keywords = [word for word in words if word in data['cleaned_content'][i]] for word_pair in combinations(keywords, 2): word1, word2 = word_pair if G.has_edge(word1, word2): G[word1][word2]['weight'] += 1 else: G.add_edge(word1, word2, weight=1) # 使用 NetworkX 导出 GML 文件 nx.write_gml(G, 'keywords_network.gml') # 输出提示信息 print("Graph exported as keywords_network.gml") print("You can import this file into Gephi for visualization.")
Python制作无向量图
最新推荐文章于 2024-04-26 01:45:18 发布