代码:
package hdfsandhbase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class Hdfs2Hbase {
public static class MyMapper extends Mapper<LongWritable,Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] lines = line.split(",");
for (String s : lines) {
context.write(new Text(s), new Text(1+""));
}
}
}
public static class MyReduce extends TableReducer<Text,Text,ImmutableBytesWritable>{
@Override
protected void reduce(Text key, Iterable<Text> value,Context context)
throws IOException, InterruptedException {
int counter = 0;
for(Text t:value) {
counter += Integer.parseInt(t.toString());
}
//写出到hbase中去
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn("data".getBytes(), "count".getBytes(), (counter+"").getBytes());
context.write(new ImmutableBytesWritable(key.getBytes()), put);
}
}
public static void main(String [] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
conf.set("hbase.zookeeper.quorum", "hadoop01");
TableName tn = TableName.valueOf("ns:test");
//对hbase进行操作
Connection conn = ConnectionFactory.createConnection(conf);
Admin admin = conn.getAdmin();
//创建命名空间
//NamespaceDescriptor nsd = NamespaceDescriptor.create("ns").build();
//admin.createNamespace(nsd);
//创建表
HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("ns:test"));
HColumnDescriptor hcd = new HColumnDescriptor("data");
htd.addFamily(hcd);
//判断表是否存在
if(admin.tableExists(tn)) {
if(admin.isTableEnabled(tn)) {
admin.disableTable(tn);
}
admin.deleteTable(tn);
}
admin.createTable(htd);
//定义job
Job job = Job.getInstance(conf,"hdfs2hbase");
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//数据输入路径
FileInputFormat.addInputPath(job,new Path("/data/input/csvdata.txt"));
//使用TableMapreduceUtil初始化reduce
TableMapReduceUtil.initTableReducerJob(
"ns:test",
MyReduce.class,
job);
job.waitForCompletion(true);
System.out.println("finished");
}
}
报错:
Exception in thread "main" java.lang.IllegalArgumentException: Pathname /D:/HBase/hbase-1.2.1-bin/lib/zookeeper-3.4.6.jar from hdfs://hadoop01:9000/D:/HBase/hbase-1.2.1-bin/lib/zookeeper-3.4.6.jar is not a valid DFS filename.
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:187)
at org.apache.hadoop.hdfs.DistributedFileSystem.access$000(DistributedFileSystem.java:101)
at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1068)
at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1064)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1064)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.getFileStatus(ClientDistributedCacheManager.java:288)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.getFileStatus(ClientDistributedCacheManager.java:224)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.determineTimestamps(ClientDistributedCacheManager.java:93)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(ClientDistributedCacheManager.java:57)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:265)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:301)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:389)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1285)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1282)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1282)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1303)
at hdfsandhbase.Hdfs2Hbase.main(Hdfs2Hbase.java:89)
解决办法:
将TableMapReduceUtil.initTableReducerJob( "ns:test", MyReduce.class, job);
改为TableMapReduceUtil.initTableReducerJob( "ns:test", MyReduce.class, job,null, null, null, null, false);
即可运行成功
原因:
程序无法识别路径为windows路径还是集群上的路径