package PageRank;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;
public class myPageRank {
public static final int N = 4847571;// 给定图数据顶点数
private static final double AFA = 0.85;//阻尼系数
private static final double DELTA = 0.1;//阈值
private static final double MAX_TIMES = 20;//迭代次数
//
private static final String FILE = "D:/soc-LiveJournal1.txt";
private static final String OUT = "D://result.txt";
//
public static void main(String args[]) throws IOException {
List<List<Integer>> graph = file_to_matrix();
pagerank(graph);
}
public static List<List<Integer>> file_to_matrix() {
List<List<Integer>> graph = new ArrayList<>(N);
for (int i = 0; i < N; i++) {
graph.add(new ArrayList<>());
}
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(FILE)), 65536);// 要是换成二进制呢?
String line;
String[] str;
int num = 0;
long start = System.currentTimeMillis();
long now = 0;
while ((line = br.readLine()) != null) {// 构建邻接表
num++;
str = line.split("\\s");// "\s"
graph.get(Integer.parseInt(str[0])).add(Integer.parseInt(str[1]));
}
now = System.currentTimeMillis() - start;
System.out.println("文件读取结束,共 " + num + "行!" + " 用时 " + now + "ms");
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
} catch (IOException e) {
br = null;
e.printStackTrace();
} finally {
br = null;
}
}
return graph;
}
// 每个节点PR初始值为1
public static void pagerank(List<List<Integer>> graph) throws IOException {
double[] Prnew = new double[N];
for (int i = 0; i < N; i++) {
Prnew[i] = 1.0;//
}
double[] Pr;
// 迭代至|Pn+1−Pn|<ϵ
long start = System.currentTimeMillis();
long now = 0;
for (int i = 1; i < MAX_TIMES; i++) {
// 保留迭代前的Pr
Pr = Prnew;
// 迭代后
Prnew = get_Prnew(graph, Pr);
double delta = get_DELTA(Prnew, Pr);
now = System.currentTimeMillis() - start;
System.out.println("第" + i + "次迭代完成,DELTA = " + delta + ", 用时 " + now + "ms");
if (delta < DELTA)
break;
}
System.out.println("\n开始计算前十节点...");
double[][] big = getBiggestPr(Prnew);
for (int j = 0; j < 10; j++) {
System.out.println("第" + (j + 1) + "大节点, node: " + (int) big[j][0] + " pr: " + big[j][1]);
}
// 写文件
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(OUT)));
double sum = 0;
for (int j = 0; j < N; j++) {
// System.out.println(Prnew[j]);
sum += Prnew[j];
bw.write(String.valueOf(Prnew[j]));
bw.newLine();
}
bw.close();
System.out.println("sum=" + sum);
} catch (Exception e) {
e.printStackTrace();
}
}
public static double[] get_Prnew(List<List<Integer>> list, double[] Pr) {
double[] Prnew = new double[N];
List<Integer> _list;
int _list_size;
for (int i = 0; i < N; i++) {
for (int k = 0; k < N; k++) {
_list = list.get(k);
_list_size = _list.size();
if ((_list.size() == 0) || (_list.size() == 1 && _list.get(0) == k)) {// 出度为0 或者 出度为1且指向自身
Prnew[i] += AFA * (Pr[k] * (1.0 / N));
} else {
for (int j = 0; j < _list_size; j++) {
if (_list.get(j) == i) {// 含有指向i的边
Prnew[i] += Pr[k] * (AFA * (1.0 / _list_size));
break;
}
}
}
}
Prnew[i] += (1 - AFA);
}
return Prnew;
}
public static double get_DELTA(double[] Prnew, double[] Pr) {
double temp = 0;
for (int i = 0; i < N; i++) {
temp += (Prnew[i] - Pr[i]) * (Prnew[i] - Pr[i]);
}
return Math.sqrt(temp);
}
public static double[][] getBiggestPr(double[] Pr) {
double[][] biggestPr = new double[10][2];
for (int i = 0; i < N; i++) {
if (Pr[i] > biggestPr[9][1]) {
biggestPr[9][0] = i;
biggestPr[9][1] = Pr[i];
for (int j = 8; j >= 0; j--) {
if (biggestPr[j + 1][1] > biggestPr[j][1]) {
// 交换
biggestPr[j + 1][0] = biggestPr[j + 1][0] + biggestPr[j][0];
biggestPr[j][0] = biggestPr[j + 1][0] - biggestPr[j][0];
biggestPr[j + 1][0] = biggestPr[j + 1][0] - biggestPr[j][0];
biggestPr[j + 1][1] = biggestPr[j + 1][1] + biggestPr[j][1];
biggestPr[j][1] = biggestPr[j + 1][1] - biggestPr[j][1];
biggestPr[j + 1][1] = biggestPr[j + 1][1] - biggestPr[j][1];
} else {
break;
}
}
}
}
return biggestPr;
}
}
Java实现PageRank实例
最新推荐文章于 2024-08-31 23:41:15 发布