PageRank算法Python实现

最新推荐文章于 2024-04-15 15:28:53 发布

kedaya的代码屋

最新推荐文章于 2024-04-15 15:28:53 发布

阅读量1k

点赞数

文章标签：算法 python

本文链接：https://blog.csdn.net/m0_51635704/article/details/128467376

版权

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
def getGm(A):
    '''
    功能：求状态转移概率矩阵Gm
    @A：网页链接图的邻接矩阵
    '''
    Gm = []
    for i in range(len(A)):
        cnt = 0
        for j in range(len(A[i])):
            if A[i][j] != 0:
                cnt += 1
        tran_prob = 1 / cnt  # 转移概率
        Gm_tmp = []
        for j in range(len(A[i])):
            Gm_tmp.append(tran_prob * A[i][j])
        Gm.append(Gm_tmp)
    Gm = np.transpose(Gm)
    return Gm
def getBaseLev(N):
    '''
    功能：计算网页所获得的基本级别(1-P)*e/n
    @N：网页总个数
    '''
    P = 0.85
    e = np.ones(N)
    R = [[(1 - P) * i * 1 / N] for i in e]
    return R
def getPR(P, Gm, R, PR):
    '''
    功能：获取PR值
    @P：加权系数，通常取 0.85 左右,按照超链接进行浏览的概率
    @Gm：状态转移概率矩阵
    @R：网页所获得的基本级别
    @PR：每个网页节点的PageRank值
    '''
    # 状态转移概率矩阵Gm与PR值相乘矩阵相乘
    Gm_PR = np.dot(Gm, PR)
    # 矩阵乘以常数P
    P_Gm_PR = P * Gm_PR
    # 矩阵相加
    new_PR = P_Gm_PR + R  # PR=P*Gm'PR+(1-d)*e/n PageRank算法的核心
    return new_PR
def res_vis(A, PR):
    '''
    将计算出来的值进行可视化展示
    @A：网页链接图的邻接矩阵
    @PR：每个网页节点最终的PageRank值
    '''
    # G=nx.Graph()构造的是无向图， G=nx.DiGraph()构造的是有向图
    # 初始化有向图，节点数为7,edge（边）被创造的随机概率
    all_edges = []
    for i in range(7):
        for j in range(len(A)):
            if A[i][j] == 1:
                all_edges.append([i + 1, j + 1])
                # （1）初始化有向图
    G = nx.DiGraph()
    # （2）添加节点
    G.add_nodes_from(range(1, len(A)))
    # （3）添加有向边
    G.add_edges_from(all_edges)
    # （4）添加PR值
    pr = {}
    for i in range(len(PR)):
        pr[i + 1] = PR[i][0]
    # (5)画图
    layout = nx.spring_layout(G)
    plt.figure(1)
    nx.draw(G, pos=layout, node_size=[x * 6000 for x in pr.values()],
            node_color='m', with_labels=True)
    plt.show()
def main():
    # 初始化参数
    N = 7  # 网页个数
    P = 0.85  # 一个加权系数，通常取 0.85 左右,按照超链接进行浏览的概率
    # 网页链接图的邻接矩阵，每一列表示一个网页的出度
    A = np.array([[0, 1, 1, 0, 1, 1, 0],
                  [1, 0, 1, 1, 0, 0, 0],
                  [1, 0, 0, 1, 1, 0, 0],
                  [1, 0, 0, 0, 1, 0, 0],
                  [1, 0, 0, 1, 0, 1, 1],
                  [0, 0, 0, 0, 1, 0, 0],
                  [1, 0, 0, 0, 0, 0, 0]])
    A = np.transpose(A)  # 转置
    # 初始化PR值为0
    new_PR = []
    for i in range(N):
        new_PR.append([0])
    count = 0  # 迭代计数器
    while True:
        PR = new_PR
        R = getBaseLev(N)
        Gm = getGm(A)
        new_PR = getPR(P, Gm, R, PR)
        count = count + 1
        print("第 %s 轮迭代" % count)
        print(str(round(new_PR[0][0], 5))
              + "\t" + str(round(new_PR[1][0], 5))
              + "\t" + str(round(new_PR[2][0], 5))
              + "\t" + str(round(new_PR[3][0], 5))
              + "\t" + str(round(new_PR[4][0], 5))
              + "\t" + str(round(new_PR[5][0], 5))
              + "\t" + str(round(new_PR[6][0], 5)))
        # 设置迭代条件
        if (round(PR[0][0], 5) == round(new_PR[0][0], 5)
                and round(PR[1][0], 5) == round(new_PR[1][0], 5)
                and round(PR[2][0], 5) == round(new_PR[2][0], 5)
                and round(PR[3][0], 5) == round(new_PR[3][0], 5)
                and round(PR[4][0], 5) == round(new_PR[4][0], 5)
                and round(PR[5][0], 5) == round(new_PR[5][0], 5)
                and round(PR[6][0], 5) == round(new_PR[6][0], 5)):
            break
    print("-------------------")
    print("PageRank值已计算完成")
    res_vis(A, new_PR)
if __name__ == '__main__':
    main()