在尝试使用hbase中的数据作为MR(MapReduce)的输入源的时候,遇到了一个看似莫名其妙的问题!
这是我的源码
package com.yc.hbase.mapreduce;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class HbaseAndMapReduce {
public static void main(String[] args) throws Exception {
//测试数据
//testData();
//show();
//完成的作业是: 有共同爱好的人
System.exit(run());
/*
* TableMapper //对输出hbase数据来格式分割的处理类
* TableReducer //对输入hbase数据来统计处理的处理类
* TableOutputFormat //来自hbase的格式
* TableInputFormat //输入hbase的格式
*/
}
public static void show(){
Admin admin = null;
Connection con = null;
try {
Configuration conf = HBaseConfiguration.create(); //获得配制文件对象
conf.set("hbase.zookeeper.quorum", "192.168.1.123");
con = ConnectionFactory.createConnection(conf); //获得连接对象
admin = con.getAdmin();
//3.操作
//建立数据库
//a.判断数据库是否存在
TableName tn = TableName.valueOf("blog2"); //创建表名对象
Scan scan = new Scan();
Table table03 = con.getTable(tn); //获得表对象
ResultScanner rs = table03.getScanner(scan);
for (Result result : rs) {
List<Cell> cs = result.listCells();
for (Cell cell : cs) {
String rowKey = Bytes.toString(CellUtil.cloneRow(cell)); //取行键
long timestamp = cell.getTimestamp(); //取到时间戳
String family = Bytes.toString(CellUtil.cloneFamily(cell)); //取到族列
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell)); //取到修饰名
String value = Bytes.toString(CellUtil.cloneValue(cell)); //取到值
System.out.println(" ===> rowKey : " + rowKey + ", timestamp : " +
timestamp + ", family : " + family + ", qualifier : " + qualifier + ", value : " + value);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static int run() throws Exception {
// conf = new Configuration();
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.123");
Job job = Job.getInstance(conf, "findFriend");
job.setJarByClass(HbaseAndMapReduce.class);
Scan scan = new Scan();
//取对业务有用的数据 tags, nickname
scan.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"));
scan.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"));
//数据来源 hbase
// TableInputFormat.addColumns(scan, columns);
//ImmutableBytesWritable来自hbase数据的类型
TableMapReduceUtil.initTableMapperJob("blog2", scan, FindFriendMapper.class,
ImmutableBytesWritable.class, Result.class, job);
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.123:9000/out/" + new Date().getTime()));
return job.waitForCompletion(false) ? 0 : 1;
}
public static class FindFriendMapper extends TableMapper<ImmutableBytesWritable, Result>{
@Override
//key是hbase中的行键
//value是hbase中的所行键的所有数据
protected void map(
ImmutableBytesWritable key,
Result value,
Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Result>.Context context)
throws IOException, InterruptedException {
System.out.println("key :: " + Bytes.toString(key.get()));
List<Cell> cs = value.listCells();
System.out.print("value :: ");
for (Cell cell : cs) {
String rowKey = Bytes.toString(CellUtil.cloneRow(cell));
long timestamp = cell.getTimestamp();
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String val = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println("RowKey=" + rowKey + ", Timestamp=" + timestamp +
", Family=" + family + ", Qualifier=" + qualifier + ", Val=" + val);
}
super.map(key, value, context);
}
}
public static void testData() {
try {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.168.1.123");
Connection con = ConnectionFactory.createConnection(conf);
Admin admin = con.getAdmin();
TableName tn = TableName.valueOf("blog2");
if (admin.tableExists(tn)) {
admin.disableTable(tn);
admin.deleteTable(tn);
}
HTableDescriptor htd = new HTableDescriptor(tn);
HColumnDescriptor hcd01 = new HColumnDescriptor("article");
htd.addFamily(hcd01);
HColumnDescriptor hcd02 = new HColumnDescriptor("author");
htd.addFamily(hcd02);
admin.createTable(htd);
Table t = con.getTable(tn);
Put put = new Put(Bytes.toBytes("1"));
put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("content"),
Bytes.toBytes("HBase is the Hadoop database. Use it when you need random, "
+ "realtime read/write access to your Big Data"));
put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"),
Bytes.toBytes("HBase,NoSql,Hadoop"));
put.addColumn(Bytes.toBytes("article"), Bytes.toBytes("title"),
Bytes.toBytes("Head First Hbase"));
put.addColumn(Bytes.toBytes("author"), Bytes.toBytes("name"),
Bytes.toBytes("zhangsan"));
put.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"),
Bytes.toBytes("sansan"));
Put put02 = new Put(Bytes.toBytes("10"));
put02.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"),
Bytes.toBytes("Hadoop"));
put02.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"),
Bytes.toBytes("xiaoshi"));
Put put03 = new Put(Bytes.toBytes("100"));
put03.addColumn(Bytes.toBytes("article"), Bytes.toBytes("tags"),
Bytes.toBytes("hbase,nosql"));
put03.addColumn(Bytes.toBytes("author"), Bytes.toBytes("nickname"),
Bytes.toBytes("superman"));
List<Put> puts = Arrays.asList(put, put02, put03);
t.put(puts);
System.out.println("==========> 测试数据准备完成...");
if (admin != null){
admin.close();
}
if(con != null){
con.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
这是报错提示:
[17 20:57:06,187 DEBUG] org.apache.hadoop.security.UserGroupInformation - PrivilegedActionException as:wrm (auth:SIMPLE) cause:0: No such file or directory
Exception in thread "main" [17 20:57:06,214 DEBUG] org.apache.hadoop.ipc.Client - stopping client from cache: org.apache.hadoop.ipc.Client@a527eb
0: No such file or directory
at org.apache.hadoop.io.nativeio.NativeIO$POSIX.chmod(NativeIO.java:236)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:652)
at org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:490)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:599)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:179)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:301)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:389)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1285)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1282)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Unknown Source)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1656)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1282)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1303)
at com.yc.hbase.mapreduce.HbaseAndMapReduce.run(HbaseAndMapReduce.java:106)
at com.yc.hbase.mapreduce.HbaseAndMapReduce.main(HbaseAndMapReduce.java:39)
又是文件找不到,又是用户找不到的,看到这个问题一开始,真的是一头雾水!当然,很容易能想到,TableMapReduceUtil.initTableMapperJob的底层肯定是将Hbase中的 数据先查到生成一个文件,然后以这个文件为源进入mapreduce进行操作。那么就很奇怪了啊!这个文件是HBase的底层生成的,不是我所写的代码,到底为什么就是找不到这个文件呢!?
我首先做的第一步是进入NativeIO中查看报错的地方,发现这里有一个path,于是便将这个源码复制到自己的项目中,将这个path输出到控制台
public static void chmod(String path, int mode) throws IOException {
if (!Shell.WINDOWS) {
chmodImpl(path, mode);
} else {
try {
//这是自己加的
System.out.printf(path);
chmodImpl(path, mode);
} catch (NativeIOException nioe) {
if (nioe.getErrorCode() == 3) {
throw new NativeIOException("No such file or directory",
Errno.ENOENT);
} else {
LOG.warn(String.format("NativeIO.chmod error (%d): %s",
nioe.getErrorCode(), nioe.getMessage()));
throw new NativeIOException("Unknown error", Errno.UNKNOWN);
}
}
}
}
发现这个文件的路径是这样的
C:\tmp\hadoop-wrm\mapred\staging\wrm61821100\.staging\job_local61821100_0001
但是很奇怪的是,在我的C盘中,连tmp目录都没有。证明文件根本就没有创建。这时候我的思维陷入了一段误区,由于前面的log显示我的用户wrm是无法找到的,我便开始怀疑是否是wrm用户没有权限访问hadoop,用命令行进入hdfs后发现hbase目录还真的没有权限,在改了权限后,满心欢喜的测试,结果还是不行!
这个问题折磨了我两天,最后竟然真的是权限问题,但是并不是hadoop中的权限问题!而是windows中!在灵光一闪中我想到了去看看我C盘的权限,发现竟然没有写入权限!咳咳、、、、、在赋予了uesr写入权限后,问题解决了T.T,真像扇自己一耳光,真是由于陷入思维误区,网上又没有这方面的解决方法,甚至报错也那么简单。导致浪费了两天的时间。真该吸取教训。
解决方法如下:
进入C:/->右键属性->安全->修改权限->users->勾选写入,修改->确定
就是这么简单- -,希望能帮到跟我一样的初学者吧!