# coding:utf-8
from pyspark import SparkContext
sc = SparkContext(appName='pr')
links = [['A', ['B', 'C']], ['B', ['A', 'C']], ['C', ['A', 'B', 'D']], ['D', ['C']]]
links = sc.parallelize(links).partitionBy(4).persist()
ranks = links.mapValues(lambda x: 1.0) # 初始值为1.0,不能为1,后边要用它作为分母
for i in range(10):
contri = links.join(ranks).flatMap(lambda x: map(lambda dest: (dest, x[1][1]/len(x[1][0])), x[1][0]))
ranks = contri.reduceByKey(lambda x, y: x + y).mapValues(lambda x: 0.15+0.85*x)
print('%s==:'
PageRank 基于 spark 的 Python实现
最新推荐文章于 2022-06-04 11:01:01 发布