map类:
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
-
输入:
-
key,value = 1,hello java
-
key,value = 2,hello bigdata
-
key,value = 3,hello hadoop
-
key,value = 4,hello spark
-
…
-
输出:
-
hello 1
-
java 1
-
hello 1
-
bigdata 1
-
hello 1
-
hadoop 1
-
…
-
KEYIN map输入的key类型(LongWritable)
-
VALUEIN map输入的value类型(Text)
-
KEYOUT map输出的key类型(Text)
-
VALUEOUT map输出的value类型(LongWritable)
-
Created by Administrator on 2017/9/7 0007.
*/
public class WordCountMap extends Mapper<LongWritable,Text,Text,LongWritable> {/**
-
数据读一行,就调用一次map方法
-
@param key
-
@param value
-
@param context
-
@throws IOException
-
@throws InterruptedException
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//读取到的一行行的数据,转换成String类型
String line = value.toString();
//将一行行的数据以 空白 切分,然后放入到数组中
//words[0] hello
//words[1] java
String[] words = line.split(",");//切分,记得注意文件格式。
List listStri = new ArrayList(Arrays.asList(words));
for (int i = 0; i <listStri.size() ; i++) {
if (listStri.get(i).equals(" ")){
listStri.remove(i);
}
}for (String word : listStri) {
context.write(new Text(word), new LongWritable(1));
}
}
} -
reduce:
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
** 输入:
-
hello 1
-
java 1
-
hello 1
-
bigdata 1
-
hello 1
-
hadoop 1
-
…
-
输出:
-
hello 3
-
java 1
-
bigdata 1
-
hadoop 1
-
…
-
Created by Administrator on 2017/9/7 0007.
*/
public class WordCountReduce extends Reducer<Text,LongWritable,Text,LongWritable> {/**
-
key,value = hello,[1,1,1]
-
输出:
-
key,value = hello,3
-
@param key
-
@param values
-
@param context
-
@throws IOException
-
@throws InterruptedException
*/
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
long counter = 0;for (LongWritable count: values) {
//累加单词出现的次数
counter = counter + count.get();
}
//key不变,统计value值[1,1,1,1,1,1] = 6
context.write(new Text(key),new LongWritable(counter));
}}
-
运行类 :
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
-
Created by Administrator on 2017/9/7 0007.
*/
public class WordCountDriver {
public static void main(String[] args) throws Exception {
//这里是得到一个包含map和reduce信息的job对象
Job job = Job.getInstance(new Configuration());
//设置驱动类,也就运行mapreduce的主类
job.setJarByClass(WordCountDriver.class);
job.setMapperClass(WordCountMap.class);
job.setReducerClass(WordCountReduce.class);//设置map输出类型 //hello 1,java 1 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); //设置reduce输出类型 //job.setOutputKeyClass(Text.class); //job.setOutputValueClass(LongWritable.class); //根据路径读取文件 FileInputFormat.setInputPaths(job,new Path("E://数据//word.txt")); FileOutputFormat.setOutputPath(job,new Path("E://数据//Hadoop//word_out5")); Boolean b = job.waitForCompletion(true); System.out.println(b ? 0 : 1);
}
}
可以将程序写到一个类里面