Hbase框架的搭建和使用_第一节_hbase的搭建和hbase读写到hdfs

最新推荐文章于 2024-07-27 10:13:06 发布

下次遇见说你好

最新推荐文章于 2024-07-27 10:13:06 发布

阅读量105

点赞数

分类专栏： Hbase 大数据文章标签：大数据 hadoop hbase mapreduce hdfs

本文链接：https://blog.csdn.net/qq_41773026/article/details/109081845

版权

大数据同时被 2 个专栏收录

5 篇文章 0 订阅

订阅专栏

Hbase

4 篇文章 0 订阅

订阅专栏

Hbase框架的搭建和使用_第一节_hbase的搭建和hbase读写到hdfs

搭建
搭建伪分布式
- - - - 一个几点就可以了我们选择没有zk的那个节点那就是 ndoe06
搭建完全分布式：
- - hbase完全分布式安装：
2. 读写 hdfs
- - hbase 从 hdfs 读取数据分析后，写入 hbase
  - 从 hbase 中读取数据，写入到 hdfs

搭建

所需要的文件

https://github.com/1367379258/BigDataED/tree/master/hbase/%E8%B5%84%E6%96%99

搭建伪分布式

一个几点就可以了我们选择没有zk的那个节点那就是 ndoe06

1. 上传 hbase-0.98.23-bin 和 protobuf到node06上
2. mv hbase-sssssss  hbase
4. vi /etc/profile    
	export HBASE_HOME=/root/hbase    Path  后面 :$HBASE_HOME/bin  . /etc/profile
3. cd hbase/conf  vi hbase-env.sh             export JAVA=/usr/java/jdk1.7.......
		  vi hbase-site.sh          加上下面的东西

	  <property>     
		<name>hbase.rootdir</name>   
	  	<value>file:///home/testuser/hbase</value>  
	 </property>  
	 <property>   
	   <name>hbase.zookeeper.property.dataDir</name>    
	  <value>/home/testuser/zookeeper</value> 
	  
	 </property>

5. start-hbase.sh   jps查看  浏览器  node06:60010

搭建完全分布式：

hbase完全分布式安装：

1、准备工作

1、网络
2、hosts
3、ssh     node09是备机  要ssh
	ssh-keygen
	ssh-copy-id -i .ssh/id_dsa.pub node1
4、时间：各个节点的时间必须一致
	date -s '2018-12-24 16:23:11'
	时间服务器
	yum install ntpdate
	ntpdate ntp1.aliyun.com
5、jdk版本

2、解压配置 node06

1、hbase-env.sh 
	JAVA_HOME	
	HBASE_MANAGES_ZK=false		//不启动 hbanse自身的zookeeper管理

2、hbase-site.xml
	<property>
		<name>hbase.rootdir</name>
		<value>hdfs://mycluster/hbase</value>
	</property>
	<property>
		<name>hbase.cluster.distributed</name>
		<value>true</value>
	</property>
	<property>
		<name>hbase.zookeeper.quorum</name>
		<value>node07,node08,node09</value>
	</property>

3、regionservers
	node07
	node08
	node09
4、backup-masters
	node09 是备机
5、拷贝hdfs-site.xml到conf目录

| 删除 hbase下的 docs 太大了 scp困难

scp -r  hbase node09:/root/
scp -r  hbase node08:/root/
scp -r  hbase node07:/root/
配置三个的环境变量  hbase	
export HBASE_HOME=/root/hbase
Path   ... :$HBASE_HOME/bin

3、start-hbase.sh 在·node06上

启动其中某一个  hbase-daemon.sh start master  node09上 待机

node06:60010

2. 读写 hdfs

hbase 从 hdfs 读取数据分析后，写入 hbase

package com.bjsxt.hbase_2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;


/**
 *  从   hdfs 读取数据 ，， 处理后 ， 写入 hbase
 * @author asus
 *
 */
public class WCRunner implements Tool{
	
	private static final Logger logger = Logger
			.getLogger(WCRunner.class);
	private Configuration conf = null;
	public static void main(String[] args) {
		
		try {
			ToolRunner.run(new Configuration(), new WCRunner(), args);
		} catch (Exception e) {
			logger.error("执行日志解析job异常",e);
			e.printStackTrace();
		}
		
	}
	
	@Override
	public void setConf(Configuration conf) {
		conf.set("fs.defaultFS", "hdfs://node06:8020");
		conf.set("mapreduce.framework.name", "yarn");
//		conf.set("mapreduce.framework.name", "local");
		conf.set("yarn.resourcemanager.hostname", "node08");
		conf.set("ha.zookeeper.quorum", "node07,node08,node09");
		conf.set("mapreduce.app-submission.cross-platform", "true");
		conf.set("hbase.zookeeper.quorum", "node07,node08,node09");
		this.conf = conf;
	}

	@Override
	public Configuration getConf() {
		return this.conf;
	}

	@Override
	public int run(String[] args) throws Exception {
		
		
		Configuration conf = this.getConf();
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(WCRunner.class);
		job.setJar("date/wc_tool.jar");   //整个项目的 jar 包
		
		job.setMapperClass(WCMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		TableMapReduceUtil.initTableReducerJob("wc", WCReducer.class, job, 
				null, null, null, null,false);;
		FileInputFormat.addInputPath(job, new Path("/usr/wc.txt"));
		job.setOutputKeyClass(NullWritable.class);
		job.setOutputValueClass(Put.class);
		
		return job.waitForCompletion(true) ? 0 : -1;
	}
}

		package com.bjsxt.hbase_2;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WCMapper  extends Mapper<LongWritable, Text, Text, IntWritable>{
	
	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
			throws IOException, InterruptedException {
		String[] split = value.toString().split("\\s+");
		for (String str: split) {
			context.write(new Text(str), new IntWritable(1));
		}
	}
}

package com.bjsxt.hbase_2;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WCReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable>{
	
	@Override
	protected void reduce(Text key, Iterable<IntWritable> value,
			Reducer<Text, IntWritable, ImmutableBytesWritable, Mutation>.Context context)
			throws IOException, InterruptedException {
		if (key != null || key.toString() != "") {
			int sum = 0;
			for (IntWritable intWritable: value) {
				sum += intWritable.get();
			}
			Put put = new Put(key.toString().getBytes());
			put.add("cf".getBytes(), "name".getBytes(), String.valueOf(sum).getBytes());
			context.write(null, put);
		}
	}
}

从 hbase 中读取数据，写入到 hdfs

package com.bjsxt.hbase_3;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;


/**
 *  从   hdfs 读取数据 ，， 处理后 ， 写入 hbase
 * @author asus
 *
 */
public class WCRunner2 implements Tool{
	
	private static final Logger logger = Logger
			.getLogger(WCRunner2.class);
	private Configuration conf = null;
	public static void main(String[] args) {
		
		try {
			ToolRunner.run(new Configuration(), new WCRunner2(), args);
		} catch (Exception e) {
			logger.error("执行日志解析job异常",e);
			e.printStackTrace();
		}
		
	}
	
	@Override
	public void setConf(Configuration conf) {
		conf.set("fs.defaultFS", "hdfs://node06:8020");
		conf.set("mapreduce.framework.name", "yarn");
//		conf.set("mapreduce.framework.name", "local");
		conf.set("yarn.resourcemanager.hostname", "node08");
		conf.set("ha.zookeeper.quorum", "node07,node08,node09");
		conf.set("mapreduce.app-submission.cross-platform", "true");
		conf.set("hbase.zookeeper.quorum", "node07,node08,node09");
		this.conf = conf;
	}

	@Override
	public Configuration getConf() {
		return this.conf;
	}

	@Override
	public int run(String[] args) throws Exception {
		
		
		Configuration conf = this.getConf();
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(WCRunner2.class);
		job.setJar("date/wc_tool2.jar");   //整个项目的 jar 包

		Scan scans = new Scan();
		scans.setCaching(500);   
		scans.setCacheBlocks(false);
		TableMapReduceUtil.initTableMapperJob("wc", scans, WCMapper2.class, Text.class, IntWritable.class, job);
	
		job.setReducerClass(WCReducer2.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		FileOutputFormat.setOutputPath(job, new Path("/usr/wc2"));

		return job.waitForCompletion(true) ? 0 : -1;
	}
}

package com.bjsxt.hbase_3;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WCReducer2 extends Reducer<Text, IntWritable, Text, IntWritable>{
	
	
	@Override
	protected void reduce(Text key, Iterable<IntWritable> value,
			Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
		
		int sum = 0;
		for (IntWritable intWritable: value) {
			sum += intWritable.get();
		}
		context.write(key, new IntWritable(sum));
	}
}

package com.bjsxt.hbase_3;

import java.io.IOException;

import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WCMapper2 extends TableMapper<Text, IntWritable>{
	
	private Text text = new Text();
	@Override
	protected void map(ImmutableBytesWritable key, Result value,
			Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context)
			throws IOException, InterruptedException {
		String str = Bytes.toString(CellUtil.cloneValue(value.getColumnLatestCell("cf".getBytes(), "name".getBytes())));
		text.set(Bytes.toString(key.get()));
		context.write(text, new IntWritable(Integer.valueOf(str)));
	}
}

下次遇见说你好

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
Hbase框架的搭建和使用_第一节_hbase的搭建和hbase读写到hdfs

Hbase框架的搭建和使用_第一节_hbase的搭建和hbase读写到hdfs搭建搭建伪分布式一个几点就可以了我们选择没有zk的那个节点那就是 ndoe06搭建完全分布式：hbase完全分布式安装：1、准备工作2、解压配置 node063、start-hbase.sh 在·node06上2. 读写 hdfshbase 从 hdfs 读取数据分析后，写入 hbase从 hbase 中读取数据，写入到 hdfs搭建搭建伪分布式一个几点就可以了我们选择没有zk的那个节点那就是 ndo
复制链接

扫一扫