mysql 批量导入hbase_hbase批量导入2

最新推荐文章于 2023-03-28 16:10:43 发布

胡格

最新推荐文章于 2023-03-28 16:10:43 发布

阅读量81

点赞数

文章标签： mysql 批量导入hbase

本文链接：https://blog.csdn.net/weixin_32364911/article/details/114334388

版权

import java.io.IOException;

import java.text.SimpleDateFormat;

import java.util.Date;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;

import org.apache.hadoop.hbase.mapreduce.TableReducer;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Counter;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

public class UptoHBase {

/**

public static void main(String[] args) throws java.io.IOException, InterruptedException,ClassNotFoundException{

// TODO Auto-generated method stub

final Configuration configuration = new Configuration();

// 设置zookeeper

//configuration.set("hbase.zookeeper.quorum", "hadoop1");

configuration.set("hbase.zookeeper.quorum", "localhost");

// 设置hbase表名称

// configuration.set(TableOutputFormat.OUTPUT_TABLE, "wlan_log");

//configuration.set(TableOutputFormat.OUTPUT_TABLE, "testhbase");

configuration.set(TableOutputFormat.OUTPUT_TABLE, "Bearing1_1_acc");

// 将该值改大，防止hbase超时退出

configuration.set("dfs.socket.timeout", "180000");

final Job job = new Job(configuration, "HBaseBatchImport");

job.setMapperClass(BatchImportMapper.class);

job.setReducerClass(BatchImportReducer.class);

// 设置map的输出，不设置reduce的输出类型

job.setMapOutputKeyClass(LongWritable.class);

job.setMapOutputValueClass(Text.class);

job.setInputFormatClass(TextInputFormat.class);

// 不再设置输出路径，而是设置输出格式类型

job.setOutputFormatClass(TableOutputFormat.class);

// FileInputFormat.setInputPaths(job, "hdfs://hadoop1:9000/input");

// FileInputFormat.setInputPaths(job, "hdfs://localhost:9000/user/hadoop/testhbase");

FileInputFormat.setInputPaths(job, "hdfs://localhost:9000/user/hadoop/Bearing1_1_acc");

job.waitForCompletion(true);

}

static class BatchImportMapper extends Mapper {

Text v2 = new Text();

protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException {

//final String[] splited = value.toString().split("\t");

final String[] splited = value.toString().split(",");

//System.out.println("%%%%%%%%%%%%"+splited[3]);

try {

//String rowKey = splited[1] + ":" + dateFormat;

//String rowKey = splited[0] ;

//第四列的前面一位补充成为 0 凑齐6位数

String str0="00";

splited[0]=str0.substring(0,2-splited[0].length())+splited[0];

//System.out.println("%%%%%%%%%%%%"+splited[0]);

String str1="00";

splited[1]=str1.substring(0,2-splited[1].length())+splited[1];

//System.out.println(".............."+splited[1]);

String str2="00";

splited[2]=str2.substring(0,2-splited[2].length())+splited[2];

//System.out.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"+splited[2]);

String str3="00000000000";

splited[3]=str3.substring(0,11-splited[3].length())+splited[3];

//System.out.println("%%%%%%%%%%%%"+splited[3]);

String rowKey = splited[0]+splited[1]+splited[2]+splited[3];

v2.set(rowKey + "," + value.toString());

//System.out.println("#####################" + key);

System.out.println("^^^^^^^^^^^^^^^^^^^" + rowKey);

//System.out.println(">>>>>>>>>>>>>>" + value.toString());

context.write(key, v2);

} catch (NumberFormatException e) {

final Counter counter = context.getCounter("BatchImport","ErrorFormat");

counter.increment(1L);

System.out.println("出错了" + splited[0] + " " + e.getMessage());

}

static class BatchImportReducer extends TableReducer {

protected void reduce(LongWritable key,java.lang.Iterable values, Context context) throws java.io.IOException, InterruptedException {

for (Text text : values) {

// final String[] splited = text.toString().split("\t");

final String[] splited = text.toString().split(",");

final Put put = new Put(Bytes.toBytes(splited[0]));

// put.add(Bytes.toBytes("cf"), Bytes.toBytes("date"), Bytes.toBytes(splited[1]));

//put.add(Bytes.toBytes("data"), Bytes.toBytes("h"),Bytes.toBytes(splited[2]));

//put.add(Bytes.toBytes("data"), Bytes.toBytes("l"),Bytes.toBytes(splited[3]));

put.add(Bytes.toBytes("data"), Bytes.toBytes("h"),Bytes.toBytes(splited[5]));

put.add(Bytes.toBytes("data"), Bytes.toBytes("l"),Bytes.toBytes(splited[6]));

// 省略其他字段，调用put.add(....)即可

context.write(NullWritable.get(), put);

}

下面截图反应的就是一次性导入2000+csv文件时的错误，扫描表时出现的

胡格

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫