将hbase中的表userinfo2中的key值和value值进行wordcount,然后输出到hdfs上,输出的要求是输出到0711文件夹,然后按照首字符进行文件夹的分割。
建表
数据
mapreduce程序
map程序
package com.hadoop.mapreduce.test.map;
import java.io.IOException;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
public class WordCountHBaseTMappser extends TableMapper<Text, IntWritable>{
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context)
throws IOException, InterruptedException {
for(Cell cell : value.rawCells()){
String rowValue = new String(CellUtil.cloneRow(cell));
String valueV = new String(CellUtil.cloneValue(cell));
context.write(new Text(rowValue), new IntWritable(1));
context.write(new Text(valueV), new IntWritable(1));
}
}
}
reduce程序
package com.hadoop.mapreduce.test.reduce;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
private MultipleOutputs<Text,IntWritable> mos;
public IntWritable intWrite = new IntWritable(0);
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
mos = new MultipleOutputs<Text,IntWritable>(context);
}
@Override
protected void reduce(Text arg0, Iterable<IntWritable> arg1, Context arg2)
throws IOException, InterruptedException {
Iterator<IntWritable> arg1Iterator = arg1.iterator();
int sum = 0;
while(arg1Iterator.hasNext()){
IntWritable intWritable = arg1Iterator.next();
sum += intWritable.get();
}
intWrite.set(sum);
char firstChar = arg0.toString().charAt(0);
if(firstChar >= 'a' && firstChar <= 'z'){
mos.write(arg0, intWrite, "/0711/" + String.valueOf(firstChar)+"/");
}else{
mos.write(arg0, intWrite, "/0711/otherFile");
}
}
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
mos.close();
}
}
main程序
package com.hadoop.mapreduce.test;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import com.hadoop.mapreduce.test.map.WordCountHBaseTMappser;
import com.hadoop.mapreduce.test.reduce.WordCountReduce;
public class WordCountHBaseT {
public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.192.137");
Job job = Job.getInstance(conf, "WordCountHBaseT");
job.setJarByClass(WordCountHBaseT.class);
Scan s = new Scan();
TableMapReduceUtil.initTableMapperJob("userInfo2", s,
WordCountHBaseTMappser.class, Text.class, IntWritable.class, job);
job.setReducerClass(WordCountReduce.class);
//设置输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileOutputFormat.setOutputPath(job, new Path(args[0]));
MultipleOutputs.addNamedOutput(job,"MOSInt",
TextOutputFormat.class,Text.class,IntWritable.class);
MultipleOutputs.addNamedOutput(job,"MOSText",
TextOutputFormat.class,Text.class,Text.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
结果:
转载于:https://blog.51cto.com/cdelliqi/1439005