Mapper类
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
/**
* hadoop版本1.x的包一般是 mapreduce
* hadoop版本0.x的包一般是maored
*
*/
public class OldMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, LongWritable>{
/**
* 新的api:extends Mapper
* 老的api:extends MapReduceBase implements Mapper
*/
@Override
public void map(LongWritable key1, Text value1, OutputCollector<Text, LongWritable> output, Reporter reporter)
throws IOException {
String[] splited = value1.toString().split("\t");
for (String string : splited) {
//新的api是Context输出的,老的api是OutputCollector
output.collect(new Text(string), new LongWritable(1));
}
}
}
Reducer
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class OldReduce extends MapReduceBase implements Reducer<Text, LongWritable, Text, LongWritable>{
@Override
public void reduce(Text key2, Iterator<LongWritable> values2, OutputCollector<Text, LongWritable> output,
Reporter reporter) throws IOException {
long times = 0l;
while(values2.hasNext()){
long temp = values2.next().get();
times += temp;
}
output.collect(key2, new LongWritable(times));
}
}
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
/**
* 类的包名不再使用mapreduce,而是使用mapred
*
*/
public class OldApiTest {
private static final String INPUT_PATH = "hdfs://xxc:9000/input";
private static final String OUT_PATH = "hdfs://xxc:9000/out";
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration conf = new Configuration();
FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
Path outPath = new Path(OUT_PATH);
if(fileSystem.exists(outPath)){
fileSystem.delete(outPath, true);
}
//这里改变了,不是Job了 形参也改变了,第二个参数原来是String类型
JobConf job = new JobConf(conf, OldApiTest.class);
FileInputFormat.setInputPaths(job, INPUT_PATH);
//job.setInputFormat(TextInputFormat.class);
job.setMapperClass(OldMapper.class);
//job.setMapOutputKeyClass(Text.class);
//job.setMapOutputValueClass(LongWritable.class);
job.setPartitionerClass(HashPartitioner.class);
job.setNumReduceTasks(1);
job.setReducerClass(OldReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
//job.setOutputFormat(TextOutputFormat.class); 这个老的API不晓得该怎么写,待查
//最后提交的时候不在是job.waitForCompletion(true);
JobClient.runJob(job);
}
}