将hdfs上的数据通过MapReduce上传到hbase上

package hbase;  


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;  
import org.apache.hadoop.hbase.HBaseConfiguration;  
import org.apache.hadoop.hbase.HColumnDescriptor;  
import org.apache.hadoop.hbase.HTableDescriptor;  
import org.apache.hadoop.hbase.TableName;  
import org.apache.hadoop.hbase.client.HBaseAdmin;  
import org.apache.hadoop.hbase.client.Mutation;  
import org.apache.hadoop.hbase.client.Put;  
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;  
import org.apache.hadoop.hbase.mapreduce.TableReducer;  
import org.apache.hadoop.io.LongWritable;  
import org.apache.hadoop.io.NullWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Job;  
import org.apache.hadoop.mapreduce.Mapper;  
import org.apache.hadoop.mapreduce.Reducer;  
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  


import java.io.IOException;  
import java.text.SimpleDateFormat;  
import java.util.Date;  


/** 
 * Created by Administrator on 2017/3/7. 
 */  
public class LoadData extends Configured {  
    public static class LoadDataMapper extends Mapper<LongWritable,Text,LongWritable,Text>{  
        private Text out = new Text();  
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmss");  

        @Override  
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {  
            //1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC  120.196.100.82  i02.c.aliimg.com        24  27  2481    24681   200
            String line = value.toString();  
            String [] splited = line.split("\t");  
            String  formatedDate = simpleDateFormat.format(new Date(Long.parseLong(splited[0].trim())));  
            String rowKeyString = splited[1]+":"+formatedDate;  
            out.set(rowKeyString+"\t"+line);  
            //13726230503:201706291728  1363157985066   13726230503 00-FD-07-A4-72-B8:CMCC  120.196.100.82  i02.c.aliimg.com        24  27  2481    24681   200
            context.write(key,out);  
        }  
    }  
    public static class LoadDataReducer extends TableReducer<LongWritable,Text,NullWritable>{  
        public static final String COLUMN_FAMILY = "cf";  
        @Override  
        protected void reduce(LongWritable key, Iterable<Text> values, Reducer<LongWritable, Text, NullWritable, Mutation>.Context context) throws IOException, InterruptedException {  

            for (Text tx : values) {  
                String[] splited = tx.toString().split("\t");  
                String rowkey = splited[0];  


                Put put = new Put(rowkey.getBytes());  
//                put.add(COLUMN_FAMILY.getBytes(), "raw".getBytes(), tx  
//                        .toString().getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "reportTime".getBytes(),  
                        splited[1].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "msisdn".getBytes(),  
                        splited[2].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "apmac".getBytes(),  
                        splited[3].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "acmac".getBytes(),  
                        splited[4].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "host".getBytes(),  
                        splited[5].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "siteType".getBytes(),  
                        splited[6].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "upPackNum".getBytes(),  
                        splited[7].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "downPackNum".getBytes(),  
                        splited[8].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "upPayLoad".getBytes(),  
                        splited[9].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "downPayLoad".getBytes(),  
                        splited[10].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "httpStatus".getBytes(),  
                        splited[11].getBytes());  
                context.write(NullWritable.get(), put);  
            }  
        }  
    }  
    public static void createHBaseTable(String tableName) throws IOException {  

        HTableDescriptor htd = new HTableDescriptor(  
                TableName.valueOf(tableName));  
        HColumnDescriptor col = new HColumnDescriptor("cf");  
        htd.addFamily(col);  
        Configuration conf = HBaseConfiguration.create();  
        conf.set("hbase.zookeeper.quorum", "hive01");  
        HBaseAdmin admin = new HBaseAdmin(conf);  
        if (admin.tableExists(tableName)) {  
            System.out.println("table exists, trying to recreate table......");  
            admin.disableTable(tableName);  
            admin.deleteTable(tableName);  
        }  
        System.out.println("create new table:" + tableName);  
        admin.createTable(htd);  

    }  
    public static void main(String[] args) throws Exception {  
       args = new String[] { "hdfs://hive01:8020/input/hbase" };    
        Configuration conf = HBaseConfiguration.create();  
        // conf.set("hbaser.rootdir","hdfs://bigdata:8020/hbase");  
        conf.set("hbase.zookeeper.quorum", "hive01"); 

        conf.set(TableOutputFormat.OUTPUT_TABLE, "phone_log"); 

        createHBaseTable("phone_log"); 

        Job job = Job.getInstance(conf, "LoadData");  
        job.setJarByClass(LoadData.class);  
        job.setNumReduceTasks(1);  


        // 3.2 map class  
        job.setMapperClass(LoadDataMapper.class);  
        job.setMapOutputKeyClass(LongWritable.class);  
        job.setMapOutputValueClass(Text.class);  


        // 3.3 reduce class  
        job.setReducerClass(LoadDataReducer.class);  
      //  job.setOutputKeyClass(NullWritable.class);     --不需要设置  
     //   job.setOutputValueClass(Mutation.class);     --不需要设置  



        Path inPath = new Path(args[0]);  
        FileInputFormat.addInputPath(job, inPath);  

        job.setOutputFormatClass(TableOutputFormat.class);  


        System.exit(job.waitForCompletion(true) ? 0 : 1);  

    }  
}  

运行结果:在hbase中查看

这里写图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
好的。 将 HDFS 上的 CSV 格式数据导入到 HBase 可以使用下面的步骤: 1. 将 CSV 文件转换成 HBase 能够识别的 Hadoop InputFormat 类型,例如 Hadoop 中的 SequenceFile 格式。 2. 使用 Hadoop 的 ImportTsv 工具将数据导入到 HBase 表中。 3. 使用 HBase 的客户端 API(例如 Java API)或命令行工具(例如 hbase shell)来查询和操作表中的数据。 下面是具体的命令示例: 1. 将 CSV 文件转换成 SequenceFile 格式: ``` hadoop jar /path/to/hadoop-streaming.jar \ -D mapreduce.job.output.key.comparator.class=org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator \ -D mapreduce.map.output.key.field.separator=, \ -D mapreduce.partition.keycomparator.options=-k1,1 \ -input /path/to/input.csv \ -output /path/to/output \ -mapper /bin/cat \ -reducer /bin/cat \ -inputformat org.apache.hadoop.mapred.SequenceFileInputFormat \ -outputformat org.apache.hadoop.mapred.SequenceFileOutputFormat ``` 2. 使用 ImportTsv 工具将数据导入到 HBase 表中: ``` hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.columns=HBASE_ROW_KEY,col1,col2,col3 \ -Dimporttsv.bulk.output=/path/to/bulk/output \ -Dimporttsv.separator=, \ tablename /path/to/output ``` 3. 使用 HBase 客户端 API 或命令行工具来查询和操作表中的数据: 使用 Java API: ```java Configuration config = HBaseConfiguration.create(); Connection connection = ConnectionFactory.createConnection(config); Table table = connection.getTable(TableName.valueOf("tablename")); Get get = new Get(Bytes.toBytes("rowkey"));
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值