查看io录入格式:
System.out.println(Arrays.toString(strs));
if (null != strs && strs.length > 0 && Arrays.toString(strs).length() > 2) {
//正确数据
context.getCounter("数据行信息", "有效行数").increment(1);
for (String s : strs) {
outkey.set(s);
//coutext.write -->buffer -->小文件 -->大文件
context.write(outkey, outval);
}
}else {
context.getCounter("数据行信息", "无效行数").increment(1);
}
2.去除[],并过滤长度大于0的
if(null != strs && strs.length > 0 && Arrays.toString(strs).replace("[", "").replace("]", "").length() > 0){
// 正确数据
context.getCounter("数据行信息","有效行数").increment(1);
for (String s : strs) {
outkey.set(s);
// context.write --> buffer --> 小文件 --> 大文件合并
context.write(outkey, outval);
}
}else{
context.getCounter("数据行信息", "无效行数").increment(1);
}
完整map源码:
//定义自己的map类M类
private static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
//定义需要用到的变量
private Text outkey = new Text();
private IntWritable outval = new IntWritable(1);
private String tmp = null;
private String[] strs = null;
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
//字符转吗
tmp = new String(value.getBytes(),0,value.getLength(),"GBK");
strs = tmp.split("\t");
//记录counter
context.getCounter("数据行信息","总行数").increment(1);
//业务判断
System.out.println(Arrays.toString(strs));
if (null != strs && strs.length > 0 && Arrays.toString(strs).length() > 2) {
//if(null != strs && strs.length > 0 && Arrays.toString(strs).replace("[", "").replace("]", "").length() > 0){
//正确数据
context.getCounter("数据行信息", "有效行数").increment(1);
for (String s : strs) {
outkey.set(s);
//coutext.write -->buffer -->小文件 -->大文件
context.write(outkey, outval);
}
}else {
context.getCounter("数据行信息", "无效行数").increment(1);
}
}
}