hadoop开发（1）eclipse写mapreduce的jar包

最新推荐文章于 2020-11-24 15:14:31 发布

吃饭-睡觉-打豆豆

最新推荐文章于 2020-11-24 15:14:31 发布

阅读量384

点赞数 1

分类专栏： mapreduce 文章标签： mapreduce maven hadoop java eclipse

本文链接：https://blog.csdn.net/weixin_45768244/article/details/106394689

版权

mapreduce 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

创建一个maven文件MapReduce

修改pom文件

	<dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>2.6.0</version>
      <scope>test</scope>
    </dependency>

完整pom

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.panzi.cn</groupId>
  <artifactId>MapReduce</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>MapReduce</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>2.6.0</version>
      <scope>test</scope>
    </dependency>
  </dependencies>
</project>

在这里插入图片描述

创建WordCount的java文件

在这里插入图片描述
代码

package com.panzi.cn.MapReduce;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount {
	// map类型<key,value>

	// keyin
	// valuein_string
	// keyout_text_string
	// valueout_int_IntWritable
	public static class MyMapper extends Mapper<Object, Text, Text, IntWritable> {

		@Override
		protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			// TODO Auto-generated method stub
			// 获取valuein
			String valuein = value.toString();
			// valuein分片
			String keyin[] = valuein.split(" ");
			// 封装
			for (String ks : keyin) {
				context.write(new Text(ks), new IntWritable(1));
			}
		}

	}

	// reduce
	public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Reducer<Text, IntWritable, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			// TODO Auto-generated method stub
			int sum = 0;
			for (IntWritable val : values) {
				sum += val.get();
			}
			context.write(key, new IntWritable(sum));
		}

	}

	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		// TODO Auto-generated method stub
		if (args.length < 2) {
			System.out.println("the argument are adfadf");
			System.exit(0);
		}
		Configuration conf = new Configuration();
		String[] arg = new GenericOptionsParser(conf, args).getRemainingArgs();
		Job job = new Job(conf, "hadoop"); // 设置环境参数
		job.setJarByClass(WordCount.class); // 设置整个程序的类名
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		FileInputFormat.addInputPath(job, new Path(arg[0]));
		FileOutputFormat.setOutputPath(job, new Path(arg[1]));
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}

}

封装

右键项目——export——选择java——JAR File——next——
在这里插入图片描述
——next——

点击Finish
上传到虚拟机上的hadoop软件目录下的share/hadoop/mapreduce中

运行

[root@master1 mapreduce]# hadoop jar wordcount.jar /data/ /out2

运行成功
在这里插入图片描述

查看结果

结果保存在：

[root@master1 mapreduce]# hdfs dfs -ls /out2
[root@master1 mapreduce]# hdfs dfs -cat /out2/part-r-00000

在这里插入图片描述
结束！睡觉