如果用Hbase提供的APi只能做少量的数据的Put,如果数据量很大10W以上用MapReudce还是很方便,单机测试10W数据插入Hbase不到1秒
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; /** */ public class HbaseBatchImport { static class BatchMapper extends Mapper<LongWritable, Text, Text, Text> { SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmssSS"); Text text = new Text(); protected void map(LongWritable key, Text value, Context context) { try { final String[] spliteds = value.toString().split("::"); Date date = new Date(); String dateFormat = simpleDateFormat.format(date); final String rowKey = spliteds[0] + "_" + dateFormat; text.set(rowKey); context.write(text, value); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } } static class BatchReducer extends TableReducer<Text, Text, NullWritable> { protected void reduce(Text key, Iterable<Text> values, Context context) { for (Text tx : values) { try { final String[] arrays = tx.toString().split("::"); Put put = new Put(key.getBytes()); put.addColumn("info".getBytes(), "name".getBytes(), arrays[1].getBytes()); put.addColumn("info".getBytes(), "address".getBytes(), arrays[2].getBytes()); put.addColumn("info".getBytes(), Bytes.toBytes("age"), arrays[3].getBytes()); context.write(NullWritable.get(), put); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } } } public static void main(String[] args) throws Exception { final Configuration configuration = new Configuration(); //设置zookeeper configuration.set("hbase.zookeeper.quorum", "127.0.0.1"); configuration.set("hbase.zookeeper.property.clientPort", "2181"); //设置hbase表名称 configuration.set(TableOutputFormat.OUTPUT_TABLE, "students"); //将该值改大,防止hbase超时退出 configuration.set("dfs.socket.timeout", "180000"); final Job job = new Job(configuration, "HBaseBatchImport"); //设置reduce的个数 job.setNumReduceTasks(3); job.setMapperClass(BatchMapper.class); job.setReducerClass(BatchReducer.class); //设置map的输出,不设置reduce的输出类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); //不再设置输出路径,而是设置输出格式类型 job.setOutputFormatClass(TableOutputFormat.class); //设置数据的输入路径 FileInputFormat.setInputPaths(job, "hdfs://MacBook-Pro.local:9000/input"); System.exit(job.waitForCompletion(true) ? 0 : 1); } }