kafka 三:Kafka数据上传至Hbase

一:- 在 hbase 中创建表

  • 建立表空间
create_namespace 'events_db'
  • 查看表空间
list_namespace
  • 在该表空间下建立表 列族:euof
create 'events_db:event_attendees','euot'
  • 在该表空间下建立表 列族:of
create 'events_db:user_friends','of'

二:上传至 hbase

  • 将 kafka 中清洗过的 topic 数据,上传至 hbase

案例一

package hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Properties;

/**
 * @Description
 * 将 kafka中的 topic 中的 event_attendees_raw 数据导入到 hbase event_attendees 表中
 * @Author:WuYou on @date 2020/5/27
 */
public class EventAttendsHB {
    public static void main(String[] args) {
        Properties prop = new Properties();
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.101.130:9092");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class);
        prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,"30000");
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
        prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
        prop.put(ConsumerConfig.GROUP_ID_CONFIG,"eventattendees3");
        final KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);

        consumer.subscribe(Collections.singleton("event_attendees1"));

        //hbasease基础配置
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","192.168.101.130");
        conf.set("hbase.zookeeper.property.clientPort","2181");
        conf.set("hbase.rootdir","hdfs://192.168.101.130:9000/hbase");

        try {
            final Connection connection = ConnectionFactory.createConnection(conf);
            final Table table = connection.getTable(TableName.valueOf("events_db:event_attendees"));

            while (true){
                ArrayList<Put> datas = new ArrayList<>();
                final ConsumerRecords<String, String> records = consumer.poll(100);
                for (ConsumerRecord<String, String> record : records) {
                    System.out.println(record.value());
                    final String[] info = record.value().toString().split(",");
                    final Put put = new Put(Bytes.toBytes((info[0] + info[1] + info[2]).hashCode()));
                    put.addColumn("euot".getBytes(),"eventid".getBytes(),info[0].getBytes());
                    put.addColumn("euot".getBytes(),"friendid".getBytes(),info[1].getBytes());
                    put.addColumn("euot".getBytes(),"stat".getBytes(),info[2].getBytes());
                    datas.add(put);
                }
                table.put(datas);
//                table.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

案例二

package hbase;

import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;

public class UserFriendsHB {
    public static void main(String[] args) {
        Properties prop = new Properties();
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.101.130:9092");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class);
        prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,"30000");
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
        prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
        prop.put(ConsumerConfig.GROUP_ID_CONFIG,"userfriends2");
        final KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
        consumer.subscribe(Collections.singleton("user_friends1"));

        final HBaseConfiguration conf = new HBaseConfiguration();
        conf.set("hbase.zookeeper.quorum","192.168.101.130");
        conf.set("hbase.zookeeper.property.clientPort","2181");
        conf.set("hbase.rootdir","hdfs://192.168.199.101.130/hbase");

        try {
            final Connection connection = ConnectionFactory.createConnection(conf);
            final Table table = connection.getTable(TableName.valueOf("events_db:user_friends"));
            while (true){
                final ConsumerRecords<String, String> records = consumer.poll(100);
                List<Put> datas = new ArrayList<>();
                for (ConsumerRecord<String, String> record : records) {
                    System.out.println(record.value().toString());
                    final String[] infos = record.value().split(",");
                    final Put put = new Put(Bytes.toBytes((infos[0] + infos[1]).hashCode()));
                    put.addColumn("of".getBytes(),"userid".getBytes(),infos[0].getBytes());
                    put.addColumn("of".getBytes(),"friendid".getBytes(),infos[1].getBytes());
                    datas.add(put);
                }
                table.put(datas);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

三:查看是否上传至 Hbase

  • 速度慢(在 hbase shell 中查看)
hbase(main):015:0> count 'events_db:event_attendees' , INTERVAL => '100000'
  • 用 hadoop 中的 mapreduce 查看,速度快 (注意看:不是在 hbase shell 中运行,在本地目录运行即可)
[root@cai bin]# hbase org.apache.hadoop.hbase.mapreduce.RowCounter 'events_db:event_attendees'

五:注意

  • 有时报错,因为虚拟机内存给小了,查看内存情况
[root@cai ~]# df -h

Filesystem               Size  Used Avail Use% Mounted on
devtmpfs                 7.8G     0  7.8G   0% /dev
tmpfs                    7.8G     0  7.8G   0% /dev/shm
tmpfs                    7.8G   12M  7.8G   1% /run
tmpfs                    7.8G     0  7.8G   0% /sys/fs/cgroup
/dev/mapper/centos-root   47G   25G   23G  53% /
/dev/sda1               1014M  150M  865M  15% /boot
tmpfs                    1.6G     0  1.6G   0% /run/user/0

需要重新分配该虚拟机的内存大小和硬盘大小
也可在本虚拟机上直接挂载硬盘,可自行百度,本文不赘述

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值