1、hdfs上的数据
1;30;"unemployed";"married";"primary";"no";1787;"no";"no";"cellular";19;"oct";79;1;-1;0;"unknown";"no"
2;33;"services";"married";"secondary";"no";4789;"yes";"yes";"cellular";11;"may";220;1;339;4;"failure";"no"
3;35;"management";"single";"tertiary";"no";1350;"yes";"no";"cellular";16;"apr";185;1;330;1;"failure";"no"
4;30;"management";"married";"tertiary";"no";1476;"yes";"yes";"unknown";3;"jun";199;4;-1;0;"unknown";"no"
5;59;"blue-collar";"married";"secondary";"no";0;"yes";"no";"unknown";5;"may";226;1;-1;0;"unknown";"no"
6;35;"management";"single";"tertiary";"no";747;"no";"no";"cellular";23;"feb";141;2;176;3;"failure";"no"
7;36;"self-employed";"married";"tertiary";"no";307;"yes";"no";"cellular";14;"may";341;1;330;2;"other";"no"
8;39;"technician";"married";"secondary";"no";147;"yes";"no";"cellular";6;"may";151;2;-1;0;"unknown";"no"
9;41;"entrepreneur";"married";"tertiary";"no";221;"yes";"no";"unknown";14;"may";57;2;-1;0;"unknown";"no"
2、mapper类代码
package hdfs2hbase;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* Mapper类,接收HDFS数据,写入到HBase表中
*
*
*/
public class ImportMapper extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put>{
private static final String SPLITTER = ";";
private static final byte[] FAMILY=Bytes.toBytes("cf");
// private static final byte[] COL1=Bytes.toBytes("c1");
private Put put =null;
private ImmutableBytesWritable rowkey = new ImmutableBytesWritable();
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, ImmutableBytesWritable, Put>.Context context)
throws IOException, InterruptedException {
String[] words = value.toString().split(SPLITTER, -1);
rowkey.set(Bytes.toBytes(words[0]));
put= new Put(rowkey.get());
int i=1;
// put.addColumn(FAMILY, COL1, Bytes.toBytes(words[i++]));
for(;i<words.length;){
put.addColumn(FAMILY, Bytes.toBytes("c"+i), Bytes.toBytes(words[i++]));
}
context.write(rowkey, put);
}
}
3、driver类
package hdfs2hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Job Driver驱动类
* @author
*
* create 'test1','cf'
*
*/
public class ImportToHBase extends Configured implements Tool {
public static final String TABLE="test1";
@Override
public int run(String[] args) throws Exception {
if(args.length!=1){
System.err.println("Usage:\n ImportToHBase <input>");
return -1;
}
Configuration conf = getConf();
TableName tableName = TableName.valueOf(TABLE);
Path inputDir = new Path(args[0]);
String jobName = "Import to "+ tableName.getNameAsString();
Job job = Job.getInstance(conf, jobName);
job.setJarByClass(ImportMapper.class);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(ImportMapper.class);
TableMapReduceUtil.initTableReducerJob(
tableName.getNameAsString(), null,job);
job.setNumReduceTasks(0);
return job.waitForCompletion(true) ? 0 : 1;
}
/**
* 测试程序
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
args= new String[]{
"/user/root/data.txt"
};
ToolRunner.run(getConfiguration(), new ImportToHBase(), args);
}
private static Configuration configuration;
public static Configuration getConfiguration(){
if(configuration==null){
configuration = new Configuration();
configuration.setBoolean("mapreduce.app-submission.cross-platform", true);// 配置使用跨平台提交任务
configuration.set("fs.defaultFS", "hdfs://master:8020");// 指定namenode
configuration.set("mapreduce.framework.name", "yarn"); // 指定使用yarn框架
configuration.set("yarn.resourcemanager.address", "master:8032"); // 指定resourcemanager
configuration.set("yarn.resourcemanager.scheduler.address", "master:8030");// 指定资源分配器
configuration.set("mapreduce.jobhistory.address", "master:10020");// 指定historyserver
configuration.set("hbase.master", "master:16000");
configuration.set("hbase.rootdir", "hdfs://master:8020/hbase");
configuration.set("hbase.zookeeper.quorum", "slave1,slave2,slave3");
configuration.set("hbase.zookeeper.property.clientPort", "2181");
//TODO 需export->jar file ; 设置正确的jar包所在位置
configuration.set("mapreduce.job.jar","C:\\Users\\Administrator\\Desktop\\hbase.jar");// 设置jar包路径
}
return configuration;
}
}