将hdfs上的数据传入hbase表中

最新推荐文章于 2022-11-11 15:56:54 发布

abc_321a

最新推荐文章于 2022-11-11 15:56:54 发布

阅读量1.1k

点赞数

本文链接：https://blog.csdn.net/abc_321a/article/details/53195721

版权

1、hdfs上的数据

1;30;"unemployed";"married";"primary";"no";1787;"no";"no";"cellular";19;"oct";79;1;-1;0;"unknown";"no"
2;33;"services";"married";"secondary";"no";4789;"yes";"yes";"cellular";11;"may";220;1;339;4;"failure";"no"
3;35;"management";"single";"tertiary";"no";1350;"yes";"no";"cellular";16;"apr";185;1;330;1;"failure";"no"
4;30;"management";"married";"tertiary";"no";1476;"yes";"yes";"unknown";3;"jun";199;4;-1;0;"unknown";"no"
5;59;"blue-collar";"married";"secondary";"no";0;"yes";"no";"unknown";5;"may";226;1;-1;0;"unknown";"no"
6;35;"management";"single";"tertiary";"no";747;"no";"no";"cellular";23;"feb";141;2;176;3;"failure";"no"
7;36;"self-employed";"married";"tertiary";"no";307;"yes";"no";"cellular";14;"may";341;1;330;2;"other";"no"
8;39;"technician";"married";"secondary";"no";147;"yes";"no";"cellular";6;"may";151;2;-1;0;"unknown";"no"
9;41;"entrepreneur";"married";"tertiary";"no";221;"yes";"no";"unknown";14;"may";57;2;-1;0;"unknown";"no"

2、mapper类代码

package hdfs2hbase;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
* Mapper类，接收HDFS数据，写入到HBase表中
*
*
*/
public class ImportMapper extends
   Mapper<LongWritable, Text, ImmutableBytesWritable, Put>{
   private static final String SPLITTER = ";";
   private static final byte[] FAMILY=Bytes.toBytes("cf");
//   private static final byte[] COL1=Bytes.toBytes("c1");
   private Put put =null;
   private ImmutableBytesWritable rowkey = new ImmutableBytesWritable();


   @Override
   protected void map(LongWritable key, Text value,
           Mapper<LongWritable, Text, ImmutableBytesWritable, Put>.Context context)
                   throws IOException, InterruptedException {
       String[] words = value.toString().split(SPLITTER, -1);

       rowkey.set(Bytes.toBytes(words[0]));
       put= new Put(rowkey.get());
       int i=1;

//       put.addColumn(FAMILY, COL1, Bytes.toBytes(words[i++]));
       for(;i<words.length;){
           put.addColumn(FAMILY, Bytes.toBytes("c"+i), Bytes.toBytes(words[i++]));
       }
       context.write(rowkey, put);

   }

}

3、driver类

package hdfs2hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Job Driver驱动类
* @author
*
* create 'test1','cf'
*
*/
public class ImportToHBase extends Configured implements Tool {
   public static final String TABLE="test1";
   @Override
   public int run(String[] args) throws Exception {
       if(args.length!=1){
           System.err.println("Usage:\n ImportToHBase <input>");
           return -1;
       }

       Configuration conf = getConf();

       TableName tableName = TableName.valueOf(TABLE);
        Path inputDir = new Path(args[0]);
        String jobName = "Import to "+ tableName.getNameAsString();
        Job job = Job.getInstance(conf, jobName);
        job.setJarByClass(ImportMapper.class);
        FileInputFormat.setInputPaths(job, inputDir);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(ImportMapper.class);
        TableMapReduceUtil.initTableReducerJob(
               tableName.getNameAsString(), null,job);
        job.setNumReduceTasks(0);
        return job.waitForCompletion(true) ? 0 : 1;
   }
   /**
   * 测试程序
   * @param args
   * @throws Exception
   */
   public static void main(String[] args) throws Exception {
       args= new String[]{
           "/user/root/data.txt"
       };

       ToolRunner.run(getConfiguration(), new ImportToHBase(), args);
   }

   private static Configuration configuration;
   public static Configuration getConfiguration(){
       if(configuration==null){

           configuration = new Configuration();
           configuration.setBoolean("mapreduce.app-submission.cross-platform", true);// 配置使用跨平台提交任务
           configuration.set("fs.defaultFS", "hdfs://master:8020");// 指定namenode
           configuration.set("mapreduce.framework.name", "yarn"); // 指定使用yarn框架
           configuration.set("yarn.resourcemanager.address", "master:8032"); // 指定resourcemanager
           configuration.set("yarn.resourcemanager.scheduler.address", "master:8030");// 指定资源分配器
           configuration.set("mapreduce.jobhistory.address", "master:10020");// 指定historyserver
           configuration.set("hbase.master", "master:16000");
           configuration.set("hbase.rootdir", "hdfs://master:8020/hbase");
           configuration.set("hbase.zookeeper.quorum", "slave1,slave2,slave3");
           configuration.set("hbase.zookeeper.property.clientPort", "2181");
           //TODO 需export->jar file ; 设置正确的jar包所在位置
           configuration.set("mapreduce.job.jar","C:\\Users\\Administrator\\Desktop\\hbase.jar");// 设置jar包路径
       }

       return configuration;
   }

}

abc_321a

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
将hdfs上的数据传入hbase表中

1、hdfs上的数据1;30;"unemployed";"married";"primary";"no";1787;"no";"no";"cellular";19;"oct";79;1;-1;0;"unknown";"no"2;33;"services";"married";"secondary";"no";4789;"yes";"yes";"cellular";11;"may";220;
复制链接

扫一扫