<strong><span style="font-size:18px;">/***
* @author YangXin
* @info 按字段分组的Mapper
*/
package unitTwelve;
import java.io.IOException;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class ByKeyMapper extends Mapper<LongWritable, Text, Text, Text>{
private Pattern splitter = Pattern.compile("\t");
private int selectedField = 1;
private int groupByField = 0;
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
String[] fields = splitter.split(value.toString());
if(fields.length - 1 < selectedField || fields.length - 1 < groupByField){
context.getCounter("Map", "LinesWithErrors").increment(1);
return;
}
String oKey = fields[groupByField];
String oValue = fields[selectedField];
context.write(new Text(oKey), new Text(oValue));
}
}
</span></strong>
按字段分组的Mapper
最新推荐文章于 2023-01-08 07:18:06 发布