由于map和reduce中只能传输KEY,VALUE对
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//将Text内容转化成String类型
String line = value.toString();
//分词
String[] words = line.split(" ");
//return <word,1>
for (String word:words)
{
context.write(new Text(word),new IntWritable(1));
}
}
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int result = 0;
for(IntWritable value:values)
{
result += value.get();
}
//输出最终KV对
context.write(key,new IntWritable(result));
}
当需要传输复杂数据类型的时候,选择传输自定义对象,但由于hadoop数据传输需求,数据必须要有合适的序列化方法。即自定义对象需要实现hadoop中的Writable接口
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* Created by hadoop on 17-2-18.
* 自定义数据类型需要在hadoop中传输需要实现Writable接口
*/
public class MyData implements Writable{
private String a;
private int b;
//为了能够反序列化必须要定义一个无参数的构造函数
public MyData() {
}
public MyData(String a, int b) {
this.a = a;
this.b = b;
}
public String getA() {
return a;
}
public void setA(String a) {
this.a = a;
}
public int getB() {
return b;
}
public void setB(int b) {
this.b = b;
}
/*
* 序列化方法
*/
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeChars(a);
dataOutput.writeInt(b);
}
/*
* 反序列化方法
*/
@Override
public void readFields(DataInput dataInput) throws IOException {
this.a = dataInput.readLine();
this.b = dataInput.readInt();
}
}