合作者推荐第一步 k-means 聚类

最新推荐文章于 2020-08-29 10:21:39 发布

qq_27047075

最新推荐文章于 2020-08-29 10:21:39 发布

阅读量156

点赞数

分类专栏：代码

本文链接：https://blog.csdn.net/qq_27047075/article/details/100595466

版权

代码专栏收录该内容

8 篇文章 0 订阅

订阅专栏

第一步：得到两个作者的数据，发表的论文数和论文被引用的数量的和

# -*- coding: utf-8 -*-
"""
Created on Thu Sep  5 10:40:57 2019

@author: Administrator
"""
import json

fo=open("D:\\mag_authors_2\\mag_authors_10.txt",'r+')
fi=open('demo3.txt','a')   
#print fo.readline()
for i in range(10000):
    js=json.loads(fo.readline())
    n_pubs=str(js['n_pubs'])
    n_citation=str(js['n_citation'])
    fi.write(n_pubs+' '+n_citation+'\n')
    print js['n_pubs'],js['n_citation']
fo.close()
fi.close()

数据示例
在这里插入图片描述
第二部：进行聚类当然是K-means 算法啦

# -*- coding: utf-8 -*-
"""
Created on Wed Sep  4 13:06:59 2019

@author: Administrator
"""

import numpy as np
import numpy.matlib 
import math as mt
import matplotlib.pyplot as plt
def d(a1,a2):
    return np.sqrt(np.sum((np.abs(a1-a2)**2)))
def getlist():#从文本中读取取数据
    fo=open("demo3.txt",'r+')
    X=[]
    for str in fo:
        z=str.split(' ')
        x=[float(z[0]),float(z[1])]
        X.append(np.array(x))
    return X



X=getlist()
a1=X[4]
a2=X[9]
a3=X[7]
line1x=[]
line1y=[]
line2x=[]
line2y=[]
line3x=[]
line3y=[]
D1=[]#D 为初始化簇
D2=[]
D3=[]
while 1:#迭代求质点和簇
    b1=a1
    b2=a2
    b3=a3
    line1x.append(a1[0])#画图用
    line1y.append(a1[1])
    plt.plot(a1[0],a1[1],'xb')
    line2x.append(a2[0])
    line2y.append(a2[1])
    plt.plot(a2[0],a2[1],'xr')
    line3x.append(a3[0])
    line3y.append(a3[1])
    plt.plot(a3[0],a3[1],'xg')
    D1=[]#初始化簇
    D2=[]
    D3=[]
    for i in X:#计算欧氏距离，把最近的添加到各簇中
        d1=d(a1,i)
        d2=d(a2,i)
        d3=d(a3,i)
        if d1<=d2:
            if d1<=d3:
                D1.append(i)
            else:
                D3.append(i)
        else:
            if d2<=d3:
                D2.append(i)
            else:
                D3.append(i)
    
    
    if len(D1)!=0:#如果簇中没有对象，则质点不变，若有对象则重新计算质点，为各指标的和的平均，下同
        x=0
        y=0
        for j in D1:
            x+=j[0]
            y+=j[1]
        a1=[x/len(D1),y/len(D1)]
   
        
    
        
    if len(D2)!=0:
        x=0
        y=0
        for j in D2:
            x+=j[0]
            y+=j[1]
        a2=[x/len(D2),y/len(D2)]
    
        
    if len(D3)!=0:
        x=0
        y=0
        for j in D3:
            x+=j[0]
            y+=j[1]
        a3=[x/len(D3),y/len(D3)]
    
        
    print a1,a2,a3
    if np.array_equal(a1,b1)&np.array_equal(a2,b2)&np.array_equal(a3,b3):#比较质点是否发生变化（是否收敛）收敛则跳出迭代
        print '收敛'
        break
print D1
print D2
print D3
plt.plot(line1x,line1y)#画出质点的变化路径
plt.plot(line2x,line2y)
plt.plot(line3x,line3y)


x1=np.array(D1).T#画出各簇中的对象
plt.plot(x1[0],x1[1],'^b',markersize=1)
x1=np.array(D2).T
plt.plot(x1[0],x1[1],'or',markersize=1)
x1=np.array(D3).T
plt.plot(x1[0],x1[1],'og',markersize=1)
plt.show()

最后结果：
在这里插入图片描述