kafka上传至hbase(使用main方法)

1、新建hbase目录

编写EventAttendeeshb java类

在这里插入图片描述

package nj.zb.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;

/**
 * @author: 03-CJH
 * @date:2020/5/28
 * @desc:
 */
public class EventAttendeeshb {
    public static void main(String[] args){
        Properties prop = new Properties();
        prop.put(ConsumerConfig.GROUP_ID_CONFIG,"eventattendees1");
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.48.141:9092");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class);
        prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,"30000");
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
        prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
        consumer.subscribe(Collections.singleton("event_attendees"));

        //hbase
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","192.168.48.141");
        conf.set("hbase.zookeeper.property.clientPort","2181");
        conf.set("hbase.rootdir","hdfs://192.168.48.141:9000/hbase");

        try {
            Connection connection = ConnectionFactory.createConnection(conf);
            Table table = connection.getTable(TableName.valueOf("events_db:event_attendees"));

            while (true){
                List<Put> datas = new ArrayList<>();
                ConsumerRecords<String, String> records = consumer.poll(100);
                for (ConsumerRecord<String, String> record : records) {
                    System.out.println(record.value());
                    String[] info = record.value().toString().split(",");
                    Put put = new Put(Bytes.toBytes((info[0] + info[1] + info[2]).hashCode()));
                    put.addColumn("euat".getBytes(),"eventid".getBytes(),info[0].getBytes());
                    put.addColumn("euat".getBytes(),"friendid".getBytes(),info[1].getBytes());
                    put.addColumn("euat".getBytes(),"status".getBytes(),info[2].getBytes());
                    datas.add(put);
                }
                table.put(datas);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

编写UserFriendshb java类

package nj.zb.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.omg.CORBA.CODESET_INCOMPATIBLE;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;

/**
 * @author: 03-CJH
 * @date:2020/5/28
 * @desc:
 */
public class UserFriendshb {
    public static void main(String[] args){
        Properties prop = new Properties();
        prop.put(ConsumerConfig.GROUP_ID_CONFIG,"userfriends1");
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.48.141:9092");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class);
        prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,"30000");
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
        prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);

        consumer.subscribe(Collections.singleton("user_friends"));

        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","192.168.48.141");
        conf.set("hbase.zookeeper.property.clientPort","2181");
        conf.set("hbase.rootdir","hdfs://192.168.48.141:9000/hbase");

        try {
            Connection connection = ConnectionFactory.createConnection(conf);
            Table table = connection.getTable(TableName.valueOf("events_db:user_friends"));

            while (true){
                ConsumerRecords<String, String> records = consumer.poll(100);
                List<Put> datas = new ArrayList<>();
                for (ConsumerRecord<String, String> record : records) {
                    System.out.println(record.value().toString());
                    String[] info = record.value().toString().split(",");
                    Put put = new Put(Bytes.toBytes((info[0]+info[1]).hashCode()));
                    put.addColumn("uf".getBytes(),"userid".getBytes(),info[0].getBytes());
                    put.addColumn("uf".getBytes(),"friendid".getBytes(),info[1].getBytes());
                    datas.add(put);
                }
                table.put(datas);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

2、运行

在这里插入图片描述
另一个运行同样如此

3、进入xshell 进行查看

ps:需要先启动hadoop,hbase,zookeeper,kafka服务

# hadoop
start-all.sh

# zookeeper
zkServer.sh start

# hbase
start-hbase.sh

# kafka (-daemon为后台启动)
kafka-server-start.sh -daemon /opt/bigdata/kafka211/config/server.properties 

进hbase 查看表空间和表

ps:当然首先你要确保hbase里面有这个表空间和表

# 进hbase页面
[root@cjh1 ~]# hbase shell
hbase> list_namespace

# 如若没有则创建
hbase> create_namespace 'events_db'
hbase> create 'events_db:user_friends','uf'

然后hbase查看

ps:因为用hbase shell的count命令执行的非常慢,本人推荐用下面的命令:

[root@cjh1 ~]# hbase org.apache.hadoop.hbase.mapreduce.RowCounter 'events_db:user_friends'

在这里插入图片描述

ps:至此,使用main方法进行kafka上传至hbase完成!

ps:望多多支持,后续文章还在持续更新中…

好的,以下是使用Java语言实现Flink实时读取Kafka并将数据HBase数据库的示例代码: ``` import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; import org.apache.flink.streaming.util.serialization.SimpleStringSchema; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import java.util.Properties; public class KafkaToHbaseExample { public static void main(String[] args) throws Exception { // 设置Kafka消费者配置 String kafkaBroker = "localhost:9092"; String kafkaTopic = "test"; String kafkaGroupId = "flink-kafka-hbase"; Properties kafkaProps = new Properties(); kafkaProps.setProperty("bootstrap.servers", kafkaBroker); kafkaProps.setProperty("group.id", kafkaGroupId); // 设置HBase表格的配置 String hbaseTableName = "test_table"; String hbaseColumnFamily = "cf"; Configuration hbaseConfig = HBaseConfiguration.create(); hbaseConfig.set(TableOutputFormat.OUTPUT_TABLE, hbaseTableName); hbaseConfig.set("hbase.zookeeper.quorum", "localhost"); hbaseConfig.set("hbase.zookeeper.property.clientPort", "2181"); Connection hbaseConnection = ConnectionFactory.createConnection(hbaseConfig); Admin hbaseAdmin = hbaseConnection.getAdmin(); TableDescriptor hbaseTableDescriptor = TableDescriptorBuilder.newBuilder(TableName.valueOf(hbaseTableName)) .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(hbaseColumnFamily)).build()) .build(); if (!hbaseAdmin.tableExists(TableName.valueOf(hbaseTableName))) { hbaseAdmin.createTable(hbaseTableDescriptor); } hbaseAdmin.close(); hbaseConnection.close(); // 创建Flink执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 创建Kafka数据流 FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>(kafkaTopic, new SimpleStringSchema(), kafkaProps); DataStream<String> kafkaStream = env.addSource(kafkaConsumer); // 将Kafka数据流转换为HBase数据流 DataStream<Put> hbaseStream = kafkaStream.map(new MapFunction<String, Put>() { @Override public Put map(String value) throws Exception { Put put = new Put(Bytes.toBytes("row key")); put.addColumn(Bytes.toBytes(hbaseColumnFamily), Bytes.toBytes("column"), Bytes.toBytes(value)); return put; } }); // 将HBase数据入表格 FlinkKafkaProducer<Put> hbaseSink = new FlinkKafkaProducer<>(kafkaBroker, hbaseTableName, new KeyedSerializationSchema<Put>() { @Override public byte[] serializeKey(Put element) { return null; } @Override public byte[] serializeValue(Put element) { return element.toByteArray(); } @Override public String getTargetTopic(Put element) { return null; } }, kafkaProps, FlinkKafkaProducer.Semantic.EXACTLY_ONCE); hbaseStream.addSink(hbaseSink); // 执行Flink任务 env.execute("Read from Kafka and write to HBase"); } } ``` 在上面的代码中,我们首先设置了Kafka消费者和HBase表格的配置。接下来,我们创建了一个HBase表格,并使用Flink的Kafka源将Kafka数据流读取到Flink中。然后,我们将Kafka数据流转换为HBase数据流,并在每个记录上创建一个Put对象,该对象包含HBase表格的行键和列。 最后,我们将HBase数据Kafka中,以便在后续流程中将其HBase表格。请注意,因为我们使用了Flink的Kafka生产者,需要实现KeyedSerializationSchema接口来序列化Put对象。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值