按字段分组的Mapper

最新推荐文章于 2023-01-08 07:18:06 发布

杨鑫newlfe

最新推荐文章于 2023-01-08 07:18:06 发布

阅读量2k

点赞数

分类专栏：大数据挖掘与大数据应用案例 Java

本文链接：https://blog.csdn.net/u012965373/article/details/50811604

版权

大数据挖掘与大数据应用案例同时被 2 个专栏收录

536 篇文章 1675 订阅 ¥9.90 ¥99.00

订阅专栏

Java

427 篇文章 33 订阅

订阅专栏

<strong><span style="font-size:18px;">/***
 * @author YangXin
 * @info 按字段分组的Mapper
 */
package unitTwelve;

import java.io.IOException;
import java.util.regex.Pattern;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class ByKeyMapper extends Mapper<LongWritable, Text, Text, Text>{
	private Pattern splitter = Pattern.compile("\t");
	private int selectedField = 1;
	private int groupByField = 0;
	
	protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
		String[] fields = splitter.split(value.toString());
		if(fields.length - 1 < selectedField || fields.length - 1 < groupByField){
			context.getCounter("Map", "LinesWithErrors").increment(1);
			return;
		}
		String oKey = fields[groupByField];
		String oValue = fields[selectedField];
		context.write(new Text(oKey), new Text(oValue));
	}
}
</span></strong>