MapReduce基本操作 -- 最优路径案例

异世界的猫咪

已于 2023-12-04 19:10:49 修改

阅读量505

点赞数 12

分类专栏： Hadoop 文章标签： mapreduce 大数据

于 2023-12-04 19:09:39 首次发布

本文链接：https://blog.csdn.net/MTYRSQT/article/details/134791586

版权

Hadoop 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

案例描述

根据相关知识内容实现 MapReduce 最优路径的算法。

案例要求

请仔细阅读左侧的相关算法知识和最优路径分析，再根据右侧代码文件中的提示；在 ShortestPathMapper 和 ShortestPathReducer 类的Begin - End区域内进行代码补充，利用 map-reduce 的并行算法计算出 A 节点到其他各节点的最短路径。各类描述如下：

optimal_path.ShortestPathMapper 为 Mapper 阶段，主要是将 A 节点到其他相连节点（包含 A 节点）的距离列举出来，然后传递给 reduce阶段，找到距离最短的； context 中 key 和 value 输出类型都是 Text。
optimal_path.ShortestPathReducer 为 Reduce 阶段，主要是找到所有存在的距离中最短的，并更新记录中的最短距离； context 中 key 和 value 输出类型都是 Text。
optimal_path.RunJob 为 Driver 驱动类；
optimal_path.Node 为 Node 封装对象类。

最优路径概述

最优路径算法是用于计算一个节点到其他所有节点的最短路径。主要特点是以起始点为中心向外层层扩展，直到扩展到终点为止。最优路径算法问题分为单源点最短路径和多源点最短路径:

单源点最短路径：指给定一个确定的源点，计算该点到其他顶点的最短距离;
多源点最短路径：指计算图中的所有顶点的到其他顶点的最短路径。

代码实现

1.启动Hadoop集群

start-all.sh

2.编写step1/optimal_path/ShortestPathMapper.java文件

package optimal_path;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import optimal_path.Node;

public class ShortestPathMapper extends Mapper<Text, Text, Text, Text> {
	protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
		int conuter = context.getConfiguration().getInt("run.counter", 1);
		Node node = new Node();
		String distance = null;
		String str = null;
        /********** Begin **********/
		// 第一次计算，填写默认距离 A:0 其他:inf
        if (conuter == 1) {
            if (key.toString().equals("A") || key.toString().equals("1")) {
                distance = "0";
            } else {
                distance = "inf";
            }
            str = distance + "\t" + value.toString();
        } else {
            str = value.toString();
        }
        context.write(key, new Text(str));
        node.FormatNode(str);
        // 没走到此节点 退出
        if (node.getDistance().equals("inf"))
            return;
        // 重新计算源点A到各点的距离
        for (int i = 0; i < node.getNodeNum(); i++) {
            String k = node.getNodeKey(i);
            String v = new String(Integer.parseInt(node.getNodeValue(i)) + Integer.parseInt(node.getDistance()) + "");
            context.write(new Text(k), new Text(v));
        }
    	/********** End **********/
	}
}

3.编写step1/optimal_path/ShortestPathReducer.java文件

package optimal_path;

import java.io.IOException;

import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import optimal_path.Node;
import optimal_path.RunJob.eInf;

public class ShortestPathReducer extends Reducer<Text, Text, Text, Text> {
	protected void reduce(Text arg0, Iterable<Text> arg1, Context arg2) throws IOException, InterruptedException {
		String min = null;
		int i = 0;
		String dis = "inf";
		Node node = new Node();
        /********** Begin **********/
		for (Text t : arg1) {
            i++;
            dis = StringUtils.split(t.toString(), '\t')[0];
            // 如果存在inf节点，表示存在没有计算距离的节点。
            // if(dis.equals("inf"))
            // arg2.getCounter(eInf.COUNTER).increment(1L);
            // 判断是否存在相邻节点，如果是则需要保留信息，并找到最小距离进行更新。
            String[] strs = StringUtils.split(t.toString(), '\t');
            if (strs.length > 1) {
                node.FormatNode(t.toString());
            }
            // 第一条数据默认是最小距离
            if (i == 1) {
                min = dis;
            } else {
                if (dis.equals("inf"))
                    ;
                else if (min.equals("inf"))
                    min = dis;
                else if (Integer.parseInt(min) > Integer.parseInt(dis)) {
                    min = dis;
                }
            }
        }
        // 有新的最小值，说明还在进行优化计算，需要继续循环计算
        if (!min.equals("inf")) {
            if (node.getDistance().equals("inf"))
                arg2.getCounter(eInf.COUNTER).increment(1L);
            else {
                if (Integer.parseInt(node.getDistance()) > Integer.parseInt(min))
                    arg2.getCounter(eInf.COUNTER).increment(1L);
            }
        }
        node.setDistance(min);
        arg2.write(arg0, new Text(node.toString()));
    	/********** End **********/
	}
}

4.编写step1/optimal_path/RunJob.java文件

package optimal_path;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import optimal_path.ShortestPathMapper;
import optimal_path.ShortestPathReducer;


public class RunJob {
	
	static enum eInf {
		COUNTER
	}

	public static void main(String[] args) {
		Configuration conf = new Configuration();
		// 设置主机地址及端口号

		conf.set("fs.defaultFS", "hdfs://localhost:9000");
		try {
            
			FileSystem fs = FileSystem.get(conf);

			int i = 0;
			long num = 1;
			long tmp = 0;
			while (num > 0) {
				i++;
				conf.setInt("run.counter", i);
				Job job = Job.getInstance(conf);
				job.setJarByClass(RunJob.class);
                //设置MR类
				job.setMapperClass(ShortestPathMapper.class);
				job.setReducerClass(ShortestPathReducer.class);
                //设置MR输出类型
				job.setMapOutputKeyClass(Text.class);
				job.setMapOutputValueClass(Text.class);
				// key value 的格式 第一个item为key，后面的item为value
				job.setInputFormatClass(KeyValueTextInputFormat.class);
				// 设置输入路径
				if (i == 1)
					FileInputFormat.addInputPath(job, new Path("/input/"));
				else
					FileInputFormat.addInputPath(job, new Path("/output/" + (i - 1)));

				Path outPath = new Path("/output/" + i);
                // 判断路径是否存在
				if (fs.exists(outPath)) {
                    // 删除路径
					fs.delete(outPath, true);
				}
                // 设置输出路径
				FileOutputFormat.setOutputPath(job, outPath);

				boolean b = job.waitForCompletion(true);
				if (b) {
                    // 获取计数值
					num = job.getCounters().findCounter(eInf.COUNTER).getValue();
					if (num == 0) {
						System.out.println("共执行了" + i + "次，完成最短路径计算");
					}
				}
			}
		} catch (Exception e) {

			e.printStackTrace();
		}
	}
}

5.编写step1/optimal_path/Node.java文件

package optimal_path;

import org.apache.hadoop.util.StringUtils;
// Node类，作用为保存节点的信息
public class Node {
    // 距离
	private String distance;
    // 节点
	private String[] adjs;
    // get、set方法
	public String getDistance() {
		return distance;
	}

	public void setDistance(String distance) {
		this.distance = distance;
	}

	public String getKey(String str) {
		return str.substring(1, str.indexOf(","));
	}

	public String getValue(String str) {
		return str.substring(str.indexOf(",") + 1, str.indexOf(")"));
	}

	public String getNodeKey(int num) {
		return getKey(adjs[num]);
	}

	public String getNodeValue(int num) {
		return getValue(adjs[num]);
	}

	public int getNodeNum() {
		return adjs.length;
	}
    // 保存节点信息
	public void FormatNode(String str) {
		if (str.length() == 0)
			return;
		String[] strs = StringUtils.split(str, '\t');
		adjs = new String[strs.length - 1];
		for (int i = 0; i < strs.length; i++) {
			if (i == 0) {
				setDistance(strs[i]);
				continue;
			}
			this.adjs[i - 1] = strs[i];
		}
	}
    // toString方法
	public String toString() {
		String str = this.distance + "";
		if (this.adjs == null)
			return str;
		for (String s : this.adjs) {
			str = str + "\t" + s;
		}
		return str;
	}

}