将学科相互引用表中的数据,统计出来,做成学科引用次数的矩阵
这一篇有点类似于前面一篇列转行关联矩阵,只是所用到的是mysql作为处理工具,这篇采用的是python来进行处理。
table2格式如下:字段re_sub即引文所在学科,ar_sb即文章所在学科,有这两个字段即可
计算学科引用次数,并将其填充到Excel表格中,做成矩阵形式
import pymysql.cursors
import logging
logging.basicConfig(filename='log.log',
format='%(asctime)s -%(name)s-%(levelname)s:%(message)s',
datefmt='%Y-%m-%d %H:%M:%S %p',
level=logging.DEBUG)
def getsubjectcount(connection,subjectA,subjectB) :
with connection.cursor() as cursor:
# Read a single record
sql = "select count(*) from `table2` where ar_sb=%s and re_sub=%s;"
cursor.execute(sql, (subjectA,subjectB))
result = cursor.fetchone()
return result['count(*)']
#读取excel中的学科数据,返回dataArray
def readDataByExcle(inPutFile,Sheet):
from openpyxl import load_workbook
wb = load_workbook(inPutFile)
sheet = wb[Sheet]
dataArray = []
print('读取数据完毕!')
for i in range(1,sheet.max_row+1):
subject = sheet["A"+str(i)].value
if subject == None :
continue
dataArray.append(subject)
return dataArray
'''
inputData:
outPutFile:输出文件名,例如:'data.xlsx'
'''
def writeDataToExcleFile(inputData,outPutFile):
from openpyxl import Workbook
wb = Workbook()
sheet = wb.active
sheet.title = "Sheet1"
i= 1
for key in inputData.keys():
sheet.cell(i,1).value = key
j = 2
for item in inputData[key]:
sheet.cell(i,j).value =item
j = j+1
i = i+1
wb.save(outPutFile)
print('数据写入完毕!')
def doJob():
connection = pymysql.connect(host='localhost',
user='root',
password='root',
db='db2',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
try:
subjectdata = readDataByExcle('F:/GEV/lda_dir/subject.xlsx','Sheet1')
data = {}
for subjectA in subjectdata:
print(subjectA)
data[subjectA] = []
for subjectB in subjectdata:
count = getsubjectcount(connection,subjectA,subjectB)
data[subjectA].append(count)
logging.info('data=>'+subjectA+"=>"+str(data[subjectA]))
writeDataToExcleFile(data,'F:/GEV/lda_dir/subjectout11.xlsx')
finally:
connection.close()
doJob()
其中学科名称放在了Excel中,结果如下,表头为手动添加