在Eclipse中本地运行Mapreduce

最新推荐文章于 2020-12-12 20:23:52 发布

学习中....

最新推荐文章于 2020-12-12 20:23:52 发布

阅读量366

点赞数 1

文章标签：本地运行Mapreduce

本文链接：https://blog.csdn.net/qq_36055407/article/details/97620520

版权

1.首先创建一个maven项目，pom.xml中的配置信息如下：

<project xmlns="http://maven.apache.org/POM/4.0.0"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>hadoop</groupId>
	<artifactId>hadoop001</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<properties>
		<argLine>-Dfile.encoding=UTF-8</argLine>
	</properties>
	<dependencies>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
			<scope>test</scope>
		</dependency>
		<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>2.7.3</version>
		</dependency>
	</dependencies>
	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>3.1</version>
				<configuration>
					<source>1.8</source>
					<target>1.8</target>
				</configuration>
			</plugin>
		</plugins>
	</build>
</project>

其中比较核心的配置为：

<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
			<scope>test</scope>
		</dependency>
		<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>2.7.3</version>
		</dependency>

2.创建一个MapRduce作业的驱动程序（以单词统计为例）

大概步骤：首先这个类需要继承Configured类和实现Tool接口，重写其中的run()方法，在run方法中配置job任务，以内部类的方式写Map和Reduce类。

package day20190728;

import java.io.IOException;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;

/*
 * 本地运行测试数据
 * 通过Tool接口可以写一个MapperReducer的驱动器
 * 
 * 继承Configured和实现Tool接口
 * 
 * */

public class Test02 extends Configured implements Tool  {

	@Override
	public int run(String[] args) throws Exception {
		Job job=Job.getInstance(getConf());
		job.setJarByClass(getClass());
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		job.setMapperClass(MyMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(LongWritable.class);
		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);
		return job.waitForCompletion(true)?0:1;
	}
	
	//map
	static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable>{

		@Override
		public void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context)
				throws IOException, InterruptedException {
			String	str=value.toString();
			String[] arr=str.split("\\s");
			for(String s: arr) {
				context.write(new Text(s), new LongWritable(1));
			}
		}
	}

	static class  MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{

		@Override
		public void reduce(Text text, Iterable<LongWritable> value,
				Reducer<Text, LongWritable, Text, LongWritable>.Context content) throws IOException, InterruptedException {
			//将获取到的数据组进行计
			long l=0;
			for(LongWritable v:value) {
				l++;
			}
			content.write(text, new LongWritable(l));
		}

	}
}

然后创建一个类运行，也可以在这个类中写一个主方法：

package day20190728;



import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

public class Test01 {
	public static void main(String[] args) throws Exception {
		Configuration conf=new Configuration();
		conf.set("fs.defaultFS", "file:///");
		conf.set("mapreduce.framework.name", "local");
		conf.set("mapreduce.task.io.sort.mb", "1");
		Path input=new Path( "D:\\JavaWorkSpace\\hadoop001\\src\\main\\java\\word.txt");
		Path output=new Path( "output");
		FileSystem fs=FileSystem.getLocal(conf);
		fs.delete(output,true);
		Test02 driver=new Test02();
		driver.setConf(conf);
		int exitcode=driver.run(new String[]{input.toString(),output.toString()});
		System.out.println(exitcode);
		IOUtils.closeStream(fs);
	}
}

运行时可能会报错，此时只需要在c盘中中windows/system32目录下添加如下配置

下载地址为：https://github.com/SweetInk/hadoop-common-2.7.1-bin