hadoop老API(基于统计单词数的例子)

最新推荐文章于 2024-07-05 08:08:54 发布

我叫阿狸猫

最新推荐文章于 2024-07-05 08:08:54 发布

阅读量806

点赞数

分类专栏： hadoop

本文链接：https://blog.csdn.net/world_java/article/details/17710953

版权

hadoop 专栏收录该内容

30 篇文章 0 订阅

订阅专栏

Mapper类

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

/**
 * hadoop版本1.x的包一般是 mapreduce
 * hadoop版本0.x的包一般是maored
 *
 */
public class OldMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, LongWritable>{
	/**
	 * 新的api:extends Mapper
	 * 老的api:extends MapReduceBase implements Mapper
	 */
	@Override
	public void map(LongWritable key1, Text value1, OutputCollector<Text, LongWritable> output, Reporter reporter)
			throws IOException {
		String[] splited = value1.toString().split("\t");
		for (String string : splited) {
			//新的api是Context输出的,老的api是OutputCollector
			output.collect(new Text(string), new LongWritable(1));
		}
	}
	
}

Reducer

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class OldReduce extends MapReduceBase implements Reducer<Text, LongWritable, Text, LongWritable>{

	@Override
	public void reduce(Text key2, Iterator<LongWritable> values2, OutputCollector<Text, LongWritable> output,
			Reporter reporter) throws IOException {
		long times = 0l;
		while(values2.hasNext()){
			long temp = values2.next().get();
			times += temp;
		}
		output.collect(key2, new LongWritable(times));
	}

}

测试类:

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


/**
 * 类的包名不再使用mapreduce,而是使用mapred 
 *
 */
public class OldApiTest {
	private static final String INPUT_PATH = "hdfs://xxc:9000/input";
	private static final String OUT_PATH = "hdfs://xxc:9000/out";

	public static void main(String[] args) throws IOException, URISyntaxException {
		Configuration conf = new Configuration();
		
		FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
		Path outPath = new Path(OUT_PATH);
		if(fileSystem.exists(outPath)){
			fileSystem.delete(outPath, true);
		}
		
		
		//这里改变了,不是Job了  形参也改变了,第二个参数原来是String类型
		JobConf job = new JobConf(conf, OldApiTest.class);
		
		FileInputFormat.setInputPaths(job, INPUT_PATH);
		//job.setInputFormat(TextInputFormat.class);
		
		job.setMapperClass(OldMapper.class);
		//job.setMapOutputKeyClass(Text.class);
		//job.setMapOutputValueClass(LongWritable.class);
		
		job.setPartitionerClass(HashPartitioner.class);
		job.setNumReduceTasks(1);
		
		job.setReducerClass(OldReduce.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);
		
		FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
		//job.setOutputFormat(TextOutputFormat.class);  这个老的API不晓得该怎么写,待查
		
		//最后提交的时候不在是job.waitForCompletion(true);
		JobClient.runJob(job);
	}
}