用MapReduce把hdfs数据写入HBase中

 

2017年03月01日 09:20:50 技术人的突破 阅读数:4117

1.使用Map+Reduce方式

 
  1. public class MapReduceImport {

  2.  
  3. /**

  4. * Mapper

  5. */

  6. static class HMapper extends Mapper<LongWritable, Text, LongWritable, Text> {

  7. Text v2 = new Text();

  8.  
  9. protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException {

  10. String[] splited = value.toString().split(" ");

  11. if (splited.length != 6)//清洗不符合标准的数据

  12. return;

  13. try {

  14. //GetRowKey.getRowKeyString方法是自己定义生成rowkey的方法

  15. //rowkey设计为IP_TimeStamp这种方式

  16. v2.set(GetRowKey.getRowKeyString(splited[2], splited[4]) + " " + value.toString());

  17. context.write(key, v2);

  18. } catch (NumberFormatException e) {

  19. System.out.println("出错了" + e.getMessage());

  20. }

  21. }

  22. }

  23.  
  24. /**

  25. * Reducer

  26. */

  27. static class HReducer extends TableReducer<LongWritable, Text, NullWritable> {

  28.  
  29. protected void reduce(LongWritable key, java.lang.Iterable<Text> values, Context context) throws java.io.IOException, InterruptedException {

  30. for (Text text : values) {

  31. String[] splited = text.toString().split(" ");

  32.  
  33. Put put = new Put(Bytes.toBytes(splited[0]));

  34.  
  35. for (int j = 1; j < splited.length; j++) {

  36. put.addColumn(Bytes.toBytes(HConfiguration.colFamily), Bytes.toBytes("log" + j), Bytes.toBytes(splited[j]));

  37. }

  38.  
  39. context.write(NullWritable.get(), put);

  40. }

  41. }

  42. }

  43.  
  44. /**

  45. * Main

  46. *

  47. * @param args

  48. * @throws Exception

  49. */

  50. public static void main(String[] args) throws Exception {

  51.  
  52.  
  53. Configuration configuration = new Configuration();

  54. //设置zookeeper

  55. configuration.set("hbase.zookeeper.quorum", HConfiguration.hbase_zookeeper_quorum);

  56. configuration.set("hbase.zookeeper.property.clientPort", "2181");

  57. //设置hbase表名称

  58. configuration.set(TableOutputFormat.OUTPUT_TABLE, HConfiguration.tableName);

  59. //将该值改大,防止hbase超时退出

  60. configuration.set("dfs.socket.timeout", "180000");

  61.  
  62. MRDriver myDriver = MRDriver.getInstance();

  63.  
  64. try {

  65. myDriver.createTableIfExistDelete(HConfiguration.tableName, HConfiguration.colFamily);

  66. } catch (Exception e) {

  67. e.printStackTrace();

  68. }

  69.  
  70. Job job = new Job(configuration, "Map+ReduceImport");

  71.  
  72. job.setMapperClass(HMapper.class);

  73. job.setReducerClass(HReducer.class);

  74.  
  75. job.setMapOutputKeyClass(LongWritable.class);

  76. job.setMapOutputValueClass(Text.class);

  77.  
  78. job.setInputFormatClass(TextInputFormat.class);

  79. //不再设置输出路径,而是设置输出格式类型TableOutputFormat

  80. job.setOutputFormatClass(TableOutputFormat.class);

  81.  
  82. FileInputFormat.setInputPaths(job, HConfiguration.mapreduce_inputPath);

  83.  
  84. job.waitForCompletion(true);

  85. }

  86. }

  •  

———————-分—-割—-线————————–

2.只使用Map的方式

 
  1. public class OnlyMapImport {

  2. /**

  3. * Mapper

  4. */

  5. static class ImportMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {

  6.  
  7. @Override

  8. public void map(LongWritable offset, Text value, Context context) {

  9. String[] splited = value.toString().split(" ");

  10. if (splited.length != 6)

  11. return;

  12. try {

  13. //GetRowKey.getRowKeyString方法是自己定义生成rowkey的方法

  14. //rowkey设计为IP_TimeStamp这种方式

  15. byte[] rowkey = Bytes.toBytes(GetRowKey.getRowKeyString(splited[2], splited[4]));

  16. Put put = new Put(rowkey);

  17.  
  18. for (int j = 0; j < splited.length; j++) {

  19. put.addColumn(Bytes.toBytes(HConfiguration.colFamily), Bytes.toBytes("log" + j), Bytes.toBytes(splited[j]));

  20. }

  21.  
  22. context.write(new ImmutableBytesWritable(rowkey), put);

  23.  
  24. } catch (NumberFormatException e) {

  25. System.out.println("出错了" + e.getMessage());

  26. } catch (IOException e) {

  27. e.printStackTrace();

  28. } catch (InterruptedException e) {

  29. e.printStackTrace();

  30. }

  31. }

  32. }

  33.  
  34. /**

  35. * Main

  36. *

  37. * @param args

  38. * @throws Exception

  39. */

  40. public static void main(String[] args) throws Exception {

  41.  
  42.  
  43. Configuration configuration = new Configuration();

  44. //设置zookeeper

  45. configuration.set("hbase.zookeeper.quorum", HConfiguration.hbase_zookeeper_quorum);

  46. configuration.set("hbase.zookeeper.property.clientPort", "2181");

  47. //设置hbase表名称

  48. configuration.set(TableOutputFormat.OUTPUT_TABLE, HConfiguration.tableName);

  49. //将该值改大,防止hbase超时退出

  50. configuration.set("dfs.socket.timeout", "180000");

  51.  
  52. MRDriver myDriver = MRDriver.getInstance();

  53.  
  54. try {

  55. myDriver.createTableIfExistDelete(HConfiguration.tableName, HConfiguration.colFamily);

  56. } catch (Exception e) {

  57. e.printStackTrace();

  58. }

  59.  
  60. Job job = new Job(configuration, "HBaseBatchImport");

  61.  
  62. job.setJarByClass(OnlyMapImport.class);

  63. job.setMapperClass(ImportMapper.class);

  64. //设置map的输出,不设置reduce的输出类型

  65. job.setMapOutputKeyClass(ImmutableBytesWritable.class);

  66. job.setMapOutputValueClass(Writeable.class);

  67. job.setNumReduceTasks(0);

  68.  
  69. job.setInputFormatClass(TextInputFormat.class);

  70. //不再设置输出路径,而是设置输出格式类型

  71. job.setOutputFormatClass(TableOutputFormat.class);

  72.  
  73. FileInputFormat.setInputPaths(job, HConfiguration.mapreduce_inputPath);

  74.  
  75. job.waitForCompletion(true);

  76. }

  77. }

  •  

经过测试,导入时间明显减少。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值