我的第一个mapreduce程序

最新推荐文章于 2023-05-10 21:52:02 发布

一个喜欢写代码的小龙龙

最新推荐文章于 2023-05-10 21:52:02 发布

阅读量102

点赞数

分类专栏： java学习 mapreduce

本文链接：https://blog.csdn.net/jws123123123/article/details/102139178

版权

java学习同时被 2 个专栏收录

27 篇文章 0 订阅

订阅专栏

mapreduce

20 篇文章 0 订阅

订阅专栏

map类：
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**

输入：
key,value = 1,hello java
key,value = 2,hello bigdata
key,value = 3,hello hadoop
key,value = 4,hello spark
…
输出：
hello 1
java 1
hello 1
bigdata 1
hello 1
hadoop 1
…
KEYIN map输入的key类型（LongWritable）
VALUEIN map输入的value类型（Text）
KEYOUT map输出的key类型（Text）
VALUEOUT map输出的value类型（LongWritable）
Created by Administrator on 2017/9/7 0007.
*/
public class WordCountMap extends Mapper<LongWritable,Text,Text,LongWritable> {

/**
- 数据读一行，就调用一次map方法
- @param key
- @param value
- @param context
- @throws IOException
- @throws InterruptedException
  */
  @Override
  protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  //读取到的一行行的数据，转换成String类型
  String line = value.toString();
  //将一行行的数据以空白切分，然后放入到数组中
  //words[0] hello
  //words[1] java
  String[] words = line.split(",");//切分，记得注意文件格式。
  List listStri = new ArrayList(Arrays.asList(words));
  for (int i = 0; i <listStri.size() ; i++) {
  if (listStri.get(i).equals(" ")){
  listStri.remove(i);
  }
  }
  
  for (String word : listStri) {
  context.write(new Text(word), new LongWritable(1));
  }
}
}

reduce:
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
** 输入：

hello 1
java 1
hello 1
bigdata 1
hello 1
hadoop 1
…
输出：
hello 3
java 1
bigdata 1
hadoop 1
…
Created by Administrator on 2017/9/7 0007.
*/
public class WordCountReduce extends Reducer<Text,LongWritable,Text,LongWritable> {

/**
- key,value = hello,[1,1,1]
- 输出：
- key,value = hello,3
- @param key
- @param values
- @param context
- @throws IOException
- @throws InterruptedException
  */
  @Override
  protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
  long counter = 0;
  
  for (LongWritable count: values) {
  //累加单词出现的次数
  counter = counter + count.get();
  }
  //key不变，统计value值[1,1,1,1,1,1] = 6
  context.write(new Text(key),new LongWritable(counter));
  }}

运行类：
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**

Created by Administrator on 2017/9/7 0007.
*/
public class WordCountDriver {
public static void main(String[] args) throws Exception {
//这里是得到一个包含map和reduce信息的job对象
Job job = Job.getInstance(new Configuration());
//设置驱动类，也就运行mapreduce的主类
job.setJarByClass(WordCountDriver.class);
job.setMapperClass(WordCountMap.class);
job.setReducerClass(WordCountReduce.class);

 //设置map输出类型
 //hello 1,java 1
 job.setMapOutputKeyClass(Text.class);
 job.setMapOutputValueClass(LongWritable.class);
 //设置reduce输出类型
 //job.setOutputKeyClass(Text.class);
 //job.setOutputValueClass(LongWritable.class);

 //根据路径读取文件
 FileInputFormat.setInputPaths(job,new Path("E://数据//word.txt"));
 FileOutputFormat.setOutputPath(job,new Path("E://数据//Hadoop//word_out5"));
 Boolean b = job.waitForCompletion(true);
 System.out.println(b ? 0 : 1);

}
}

可以将程序写到一个类里面