Hbase框架的搭建和使用_第一节_hbase的搭建和hbase读写到hdfs
搭建
所需要的文件
https://github.com/1367379258/BigDataED/tree/master/hbase/%E8%B5%84%E6%96%99
搭建 伪分布式
一个几点就可以了 我们选择没有zk的那个节点那就是 ndoe06
1. 上传 hbase-0.98.23-bin 和 protobuf到node06上
2. mv hbase-sssssss hbase
4. vi /etc/profile
export HBASE_HOME=/root/hbase Path 后面 :$HBASE_HOME/bin . /etc/profile
3. cd hbase/conf vi hbase-env.sh export JAVA=/usr/java/jdk1.7.......
vi hbase-site.sh 加上下面的东西
<property>
<name>hbase.rootdir</name>
<value>file:///home/testuser/hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/testuser/zookeeper</value>
</property>
5. start-hbase.sh jps查看 浏览器 node06:60010
搭建完全分布式 :
hbase完全分布式安装:
1、准备工作
1、网络
2、hosts
3、ssh node09是备机 要ssh
ssh-keygen
ssh-copy-id -i .ssh/id_dsa.pub node1
4、时间:各个节点的时间必须一致
date -s '2018-12-24 16:23:11'
时间服务器
yum install ntpdate
ntpdate ntp1.aliyun.com
5、jdk版本
2、解压配置 node06
1、hbase-env.sh
JAVA_HOME
HBASE_MANAGES_ZK=false //不启动 hbanse自身的zookeeper管理
2、hbase-site.xml
<property>
<name>hbase.rootdir</name>
<value>hdfs://mycluster/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>node07,node08,node09</value>
</property>
3、regionservers
node07
node08
node09
4、backup-masters
node09 是备机
5、拷贝hdfs-site.xml到conf目录
| 删除 hbase下的 docs 太大了 scp困难
scp -r hbase node09:/root/
scp -r hbase node08:/root/
scp -r hbase node07:/root/
配置三个的环境变量 hbase
export HBASE_HOME=/root/hbase
Path ... :$HBASE_HOME/bin
3、start-hbase.sh 在·node06上
启动其中某一个 hbase-daemon.sh start master node09上 待机
node06:60010
2. 读写 hdfs
hbase 从 hdfs 读取数据 分析后 ,写入 hbase
package com.bjsxt.hbase_2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
/**
* 从 hdfs 读取数据 ,, 处理后 , 写入 hbase
* @author asus
*
*/
public class WCRunner implements Tool{
private static final Logger logger = Logger
.getLogger(WCRunner.class);
private Configuration conf = null;
public static void main(String[] args) {
try {
ToolRunner.run(new Configuration(), new WCRunner(), args);
} catch (Exception e) {
logger.error("执行日志解析job异常",e);
e.printStackTrace();
}
}
@Override
public void setConf(Configuration conf) {
conf.set("fs.defaultFS", "hdfs://node06:8020");
conf.set("mapreduce.framework.name", "yarn");
// conf.set("mapreduce.framework.name", "local");
conf.set("yarn.resourcemanager.hostname", "node08");
conf.set("ha.zookeeper.quorum", "node07,node08,node09");
conf.set("mapreduce.app-submission.cross-platform", "true");
conf.set("hbase.zookeeper.quorum", "node07,node08,node09");
this.conf = conf;
}
@Override
public Configuration getConf() {
return this.conf;
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf);
job.setJarByClass(WCRunner.class);
job.setJar("date/wc_tool.jar"); //整个项目的 jar 包
job.setMapperClass(WCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
TableMapReduceUtil.initTableReducerJob("wc", WCReducer.class, job,
null, null, null, null,false);;
FileInputFormat.addInputPath(job, new Path("/usr/wc.txt"));
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Put.class);
return job.waitForCompletion(true) ? 0 : -1;
}
}
package com.bjsxt.hbase_2;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
String[] split = value.toString().split("\\s+");
for (String str: split) {
context.write(new Text(str), new IntWritable(1));
}
}
}
package com.bjsxt.hbase_2;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WCReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> value,
Reducer<Text, IntWritable, ImmutableBytesWritable, Mutation>.Context context)
throws IOException, InterruptedException {
if (key != null || key.toString() != "") {
int sum = 0;
for (IntWritable intWritable: value) {
sum += intWritable.get();
}
Put put = new Put(key.toString().getBytes());
put.add("cf".getBytes(), "name".getBytes(), String.valueOf(sum).getBytes());
context.write(null, put);
}
}
}
从 hbase 中读取数据 , 写入到 hdfs
package com.bjsxt.hbase_3;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
/**
* 从 hdfs 读取数据 ,, 处理后 , 写入 hbase
* @author asus
*
*/
public class WCRunner2 implements Tool{
private static final Logger logger = Logger
.getLogger(WCRunner2.class);
private Configuration conf = null;
public static void main(String[] args) {
try {
ToolRunner.run(new Configuration(), new WCRunner2(), args);
} catch (Exception e) {
logger.error("执行日志解析job异常",e);
e.printStackTrace();
}
}
@Override
public void setConf(Configuration conf) {
conf.set("fs.defaultFS", "hdfs://node06:8020");
conf.set("mapreduce.framework.name", "yarn");
// conf.set("mapreduce.framework.name", "local");
conf.set("yarn.resourcemanager.hostname", "node08");
conf.set("ha.zookeeper.quorum", "node07,node08,node09");
conf.set("mapreduce.app-submission.cross-platform", "true");
conf.set("hbase.zookeeper.quorum", "node07,node08,node09");
this.conf = conf;
}
@Override
public Configuration getConf() {
return this.conf;
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf);
job.setJarByClass(WCRunner2.class);
job.setJar("date/wc_tool2.jar"); //整个项目的 jar 包
Scan scans = new Scan();
scans.setCaching(500);
scans.setCacheBlocks(false);
TableMapReduceUtil.initTableMapperJob("wc", scans, WCMapper2.class, Text.class, IntWritable.class, job);
job.setReducerClass(WCReducer2.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileOutputFormat.setOutputPath(job, new Path("/usr/wc2"));
return job.waitForCompletion(true) ? 0 : -1;
}
}
package com.bjsxt.hbase_3;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WCReducer2 extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> value,
Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable intWritable: value) {
sum += intWritable.get();
}
context.write(key, new IntWritable(sum));
}
}
package com.bjsxt.hbase_3;
import java.io.IOException;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WCMapper2 extends TableMapper<Text, IntWritable>{
private Text text = new Text();
@Override
protected void map(ImmutableBytesWritable key, Result value,
Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
String str = Bytes.toString(CellUtil.cloneValue(value.getColumnLatestCell("cf".getBytes(), "name".getBytes())));
text.set(Bytes.toString(key.get()));
context.write(text, new IntWritable(Integer.valueOf(str)));
}
}