常用图算法实现--Hadoop

常用图算法在Hadoop上的实现PageRank数据准备边:1 21 152 32 42 52 62 73 134 25 115 126 16 76 87 17 88 18 98 109 149 110 110 1311 1211 112 113 1414 1215 1网页:1 22 53 1 4 15 26 37...
摘要由CSDN通过智能技术生成

常用图算法在Hadoop上的实现

PageRank

数据准备

边:

1 2
1 15
2 3
2 4
2 5
2 6
2 7
3 13
4 2
5 11
5 12
6 1
6 7
6 8
7 1
7 8
8 1
8 9
8 10
9 14
9 1
10 1
10 13
11 12
11 1
12 1
13 14
14 12
15 1

网页:

1 2
2 5
3 1 
4 1
5 2
6 3
7 2
8 3
9 2
10 2
11 2
12 1
13 1
14 1
15 1

将这两个文件放入HDFS:

hdfs dfs -mkdir input/PageRank
hdfs dfs -put links.txt input/PageRank
hdfs dfs -put pagesHadoop.txt input/PageRank

编写程序

PageRank

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URISyntaxException;

import static java.lang.StrictMath.abs;


public class PageRank {
   

    private static final String CACHED_PATH = "output/cache";
    private static final String ACTUAL_PATH = "output/Graph/HadoopPageRank";
    public static final int maxIterations = 500;
    public static final double threshold = 0.0001;
    public static final double dumping = 0.85;
    public static int pageNum = 0;

    public static void main(String[] args) throws IOException,
            InterruptedException, ClassNotFoundException, URISyntaxException {
   

        Configuration conf = new Configuration();
        String[] otherArgs = (new GenericOptionsParser(conf, args)).getRemainingArgs();
        if (otherArgs.length != 3) {
   
            System.err.println("Usage: PageRank <PagePath> <LinksPath> <PageNum>");
            System.exit(2);
        }

        int code = 0;

        Path PagePath = new Path(otherArgs[0]);
        Path LinksPath = new Path(otherArgs[1]);
        pageNum = Integer.parseInt(otherArgs[2]);

        conf.set("pageNum", pageNum + "");
        conf.set("dumping", dumping + "");


        Path cachePath = new Path(CACHED_PATH);
        Path actualPath = new Path(ACTUAL_PATH);

        // Delete output if exists
        FileSystem hdfs = FileSystem.get(conf);
        if (hdfs.exists(actualPath))
            hdfs.delete(actualPath, true); // recursive delete

        // prepare original rank
        for (int i = 1; i <= pageNum; i++)
            writeFileByline(ACTUAL_PATH + "/part-r-00000", i + " " + 1.0 / pageNum);


        int counter = 0;
        boolean changed = true;

        while (counter < maxIterations && changed) {
   

            // Delete output if exists
            if (hdfs.exists(cachePath))
                hdfs.delete(cachePath, true);
            //moving the previous iteration file to the cache directory
            hdfs.rename(actualPath, cachePath);

            conf.set("mapreduce.output.textoutputformat.separator", " ");
            conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");


            Job PageRank = Job.getInstance(conf, "PageRank " + (counter + ""));

            // add cache
            PageRank.addCacheFile(PagePath.toUri());

            PageRank.setJarByClass(PageRankMapper.class);
            FileInputFormat.addInputPath(PageRank, LinksPath);
            // set out put path : output/means
            FileOutputFormat.setOutputPath(PageRank, actualPath);

            PageRank.setMapperClass(PageRankMapper.class);
            PageRank.setInputFormatClass(KeyValueTextInputFormat.class);
            PageRank.setMapOutputKeyClass(IntWritable.class);
            PageRank.setMapOutputValueClass(DoubleWritable.class);

            PageRank.setReducerClass(PageRankReducer.class);
            PageRank.setOutputKeyClass(IntWritable.class);
            PageRank.setOutputValueClass(DoubleWritable.class);

            // Execute job
            code = PageRank.waitForCompletion(true) ? 0 : 1;

            //checking if the mean is stable
            BufferedReader file1Reader = new BufferedReader(new InputStreamReader(hdfs.open(new Path(CACHED_PATH + "/part-r-00000"))));
            BufferedReader file2Reader = new BufferedReader(new InputStreamReader(hdfs.open(new Path(ACTUAL_PATH + "/part-r-00000"))));
            for (int i = 0; i < pageNum; i++) {
   
                double rank1 = Double.parseDouble(file1Reader.readLine().split(" ")[1]);
                double rank2 = Double.parseDouble(file2Reader.readLine().split(" ")[1]);

                if (abs(rank1 - rank2) <= threshold) {
   
                    changed = false;
                } else {
   
                    changed = true;
                    break;
                }
            }
            file1Reader.close();
            file2Reader.close();
            counter++;
            System.out.println("PageRank finished iteration:>> " + counter + " || rank change: " + changed);

        }

        System.exit(code);

    }


    public static void writeFileByline(String dst, String contents) throws IOException {
   
        Configuration conf = new Configuration();
        Path dstPath = new Path(dst);
        FileSystem fs = dstPath.getFileSystem(conf);
        FSDataOutputStream outputStream = null;

        if (
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值