传播经管前沿知识,数智赋能商业实践@经管有数
一、数据清洗
原始数据来源:知网(https://www.cnki.net/)《经济研究》、《管理世界》、《管理科学学报》2011-2021年论文数据
data = pd.read_excel('data/Case1/顶刊论文.xlsx')
data
1.1 机构数据
def getlist(data):
return [i for i in data.split(';') if i != '']
data['Organ_list'] = data['Organ'].apply(getlist)
Organ_list = []
for i in data['Organ_list']:
Organ_list.extend(i)
print(len(Organ_list))
print(len(set(Organ_list)))
Organ_dict = {}
for i in set(Organ_list):
Organ_dict[i] = Organ_list.count(i)
Organ_dict
1.2 大学数据
import re
def getuniversity(data):
rawdata = [re.findall('(.*?)大学',i) for i in data]
return list(set([i[0]+'大学' for i in rawdata if len(i) !=0]))
data['University_list'] = data['Organ_list'].apply(getuniversity)
University_list = []
for i in data['University_list']:
University_list.extend(i)
print(len(University_list))
print(len(set(University_list)))
二、共现矩阵
def build_matrix(nodes):#nodes节点列表
edge = len(nodes)+1 #建立矩阵,矩阵的高度和宽度为关键词集合的长度+1
matrix = [['' for j in range(edge)] for i in range(edge)] # 初始化矩阵
matrix[0][1:] = np.array(nodes)
matrix = list(map(list, zip(*matrix)))
matrix[0][1:] = np.array(nodes) # 赋值矩阵的第一行与第一列
return matrix
University_matrix = build_matrix(nodeslist)
pd.DataFrame(University_matrix)
#共现矩阵
def count_matrix(matrix,array_data):
for row in range(1,len(matrix)):
#从下标1开始遍历矩阵的行元素
for col in range(1,len(matrix)):
# 从下标1开始遍历矩阵列元素
if matrix[0][row] == matrix[col][0]:
#取出的行关键词和列关键词相同,则其对应的共现次数为0,即矩阵对角线为0
matrix[col][row]=0
else:
counter = 0 # 初始化计数器
for ech in array_data:
# 遍历格式化后的原始数据,让取出的行关键词和取出的列关键词进行组合,
# 再放到每条原始数据中查询
if matrix[0][row] in ech and matrix[col][0] in ech:
counter += 1
else:
continue
matrix[col][row] = counter
return matrix
NXdata = pd.DataFrame(count_matrix(University_matrix,University_array))
NXdata
三、网络图
g = nx.from_pandas_edgelist(sample,'from','to',edge_attr='weight')
edges = g.edges()
weights = [g[u][v]['weight']/5 for u,v in edges]
pos = nx.random_layout(g)
plt.figure(figsize=(40,40),dpi = 400)
nx.draw_networkx(g,pos,with_labels = True,width = weights ,node_size=20,font_size =20,edge_color = 'b',font_color = 'r',alpha=1)
plt.axis('off')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.savefig('output/关键词共同现网络.png',dpi = 390)
plt.show()