Page rank

Implement  pageRank algorithm in python

run : ./**.py -o output file -d dumping factor -e epsilon infile

date format:

first line: max node number

the left lines: node id : link node count, node id



from __future__ import division
#!usr/bin/python

import sys
import getopt
import scipy.sparse as sp
from scipy.spatial import distance
from numpy import * 
import pdb


def receiveArguments(argv):
	dfactor=''
	epsilon=''
	output=''
	try:
		opts,args=getopt.getopt(argv,'d:e:o:',[])
	except getopt.GetoptError:
		print 'Wrong arguments'
		return 
	for name,value in opts:
		if name=='-d':
			dfactor=value
		elif name=='-e':
			epsilon=value
		elif name =='-o':
			output=value
		else:
			pass
	if dfactor=='' or epsilon=='' or output=='' or len(args)==0:
		print 'Lack arguments'
		return 
	return dfactor,epsilon,output,args[0]


d,e,outfile,infile= receiveArguments(sys.argv[1:])

d=float(d)
e=float(e)

pf=open(infile,'r')
fileList=pf.read()
pf.close()

fileList=fileList.splitlines()
maxNodeLine=fileList[0]
maxNodeLine=maxNodeLine.split(' ')
maxNode=int(maxNodeLine[1])

addP=ones(maxNode)

p2=ones(maxNode)
p1=ones(maxNode)*100
smoothP=p2*(1-d)

#pdb.set_trace()
rowIndex=[]
colIndex=[]
data=[]


for line in fileList[1:]:
	line=line.split(':')
	nodeId=int(line[0]);
	listing=line[1].split(' ')
	outNum=int(listing[0])
	
	addP[nodeId-1]=0
	for node in listing[1:]:
#		matrixA[nodeId-1][int(node)-1]=int(1/outNum)
		rowIndex.append(int(node)-1)
		colIndex.append(nodeId-1)
		data.append(1.0/outNum)

del fileList
#pdb.set_trace()

smatrixA=sp.csc_matrix((array(data),(array(rowIndex),array(colIndex))),shape=(maxNode,maxNode))

while distance.euclidean(p1,p2)>e:
	p1=p2
	add=dot(p1,addP)
	p2=smoothP+d*(smatrixA.dot(p1)+(add*ones(maxNode)-addP*p1)/(maxNode-1))
#	pdb.set_trace()

del smatrixA
result=''

for nodeId,score in enumerate(p2):
#	print j,':%0.6f'% i
	line=str(nodeId+1)+':'+str(float('%.6f'%score))+'\n'
	result+=line


pf=open(outfile,'w')
pf.write(result)
pf.close()



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值