接口思想将kafka topic写入到hbase中

直接使用main方法

package kafkatohb;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;

/**
 * 将Kafka中topic为 user_friends 的数据消费到
 * hbase events_db:user_friend中
 */
public class UserFriendToHBCopy {
    static int num = 0;
    public static void main(String[] args) {
// kafka消费端属性 配置
        final Properties prop = new Properties();
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.232.211:9092");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
//        prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
//        prop.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG,"10000");

        // 设置是否自动提交,获取数据的装态  false 手动提交    true 自动提交
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
        prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
        prop.put(ConsumerConfig.GROUP_ID_CONFIG,"user_friend_group1");
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");

        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(prop);
        consumer.subscribe(Collections.singleton("userfriends"));

        // 配置hbase信息,连接hbase数据库
        Configuration conf = HBaseConfiguration.create();
        conf.set(HConstants.HBASE_DIR, "hdfs://192.168.232.211:9000/hbase");
        conf.set(HConstants.ZOOKEEPER_QUORUM, "192.168.232.211");
        conf.set(HConstants.CLIENT_PORT_STR, "2181");
//        "hbase.zookeeper.property.clientPort",


        try {
            Connection connection = ConnectionFactory.createConnection(conf);
            Table userFriendTable = connection
                    .getTable(TableName.valueOf("events_db:user_friend"));

            while (true){//循环拉取数据
                ConsumerRecords<String, String> poll = consumer.poll(100);//不是一次性拉取
                List<Put> datas = new ArrayList<>();
                for (ConsumerRecord record : poll) {
                    System.out.println(record.value().toString());
                    String[] split = record.value().toString().split(",");
//                    30386403   30279525
                    Put put = new Put(Bytes.toBytes((split[0] + split[1]).hashCode()));
                    put.addColumn("uf".getBytes(),"userid".getBytes(),split[0].getBytes());
                    put.addColumn("uf".getBytes(),"friendid".getBytes(),split[1].getBytes());
                    datas.add(put);
                }
                num+=datas.size();
                System.out.println("----------------------------------num: " +num);
                if(datas.size()!=0) {
                    userFriendTable.put(datas);
                }
                try {
                    Thread.sleep(100);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

使用接口

将kafka传过来的数据作为参数写一个接口,返回put对象集合

不同的kafka topic会有不同的组装方式,将kafka传过来的数据作为参数写一个接口,不同的方式都实现这个接口

package kafkatohb;

import org.apache.hadoop.hbase.client.Put;
import org.apache.kafka.clients.consumer.ConsumerRecords;

import java.util.List;

public interface IParseRecord {
    public List<Put> parse(ConsumerRecords<String,String> record);
}

package kafkatohb;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;


import java.util.ArrayList;
import java.util.List;

/**
 * @Author qiaozhengchao
 * @Date 2021/6/1
 * @Description
 */
public class UserFriendHandler implements IParseRecord {
    @Override
    public List<Put> parse(ConsumerRecords<String, String> records) {
        List<Put> datas = new ArrayList<>();
        for (ConsumerRecord record : records) {
            System.out.println(record.value().toString());
            String[] split = record.value().toString().split(",");
            Put put = new Put(Bytes.toBytes((split[0] + split[1]).hashCode()));
            put.addColumn("uf".getBytes(),"userid".getBytes(),split[0].getBytes());
            put.addColumn("uf".getBytes(),"friendid".getBytes(),split[1].getBytes());
            datas.add(put);
        }
        return datas;
    }
}

抽象写的动作

操作hbase最终的结果实际就是往hbase表中写入数据,不同的表写的动作也不相同,接口只关注传入的参数表名和kafka的数据

package kafkatohb.write;

import org.apache.kafka.clients.consumer.ConsumerRecords;

public interface IWriter {
    public int write(String tableName, ConsumerRecords<String,String> records);
}
package kafkatohb.write;

import kafkatohb.IParseRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.kafka.clients.consumer.ConsumerRecords;

import java.io.IOException;
import java.util.List;

public class HBaseWriter implements IWriter {

    private Connection connection=null;
    //将handler作为HBaseWriter 的属性传进来
    private IParseRecord handler=null;
    //创建HBaseWriter对象,可以获取Connection对象和
    public HBaseWriter(IParseRecord handler) {
        this.handler = handler;
        // 配置hbase信息,连接hbase数据库
        Configuration conf = HBaseConfiguration.create();
        conf.set(HConstants.HBASE_DIR, "hdfs://192.168.232.211:9000/hbase");
        conf.set(HConstants.ZOOKEEPER_QUORUM, "192.168.232.211");
        conf.set(HConstants.CLIENT_PORT_STR, "2181");
        try {
            connection = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override
    public int write(String tableName, ConsumerRecords<String, String> records) {
        try {
            Table table = connection
                    .getTable(TableName.valueOf(tableName));
            List<Put> datas = handler.parse(records);
            table.put(datas);
            table.close();
            return datas.size();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;
    }
}

Worker

package kafkatohb.write;

public interface IWorker {
    //目标表的名字
    public void fillData(String gargetName);
}
package kafkatohb.write;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.util.Collections;
import java.util.Properties;

public class Worker implements IWorker {
    KafkaConsumer<String, String> consumer;
    IWriter writer = null;
    public Worker(String topicName, String groupId, IWriter writer){
        this.writer = writer;
        final Properties prop = new Properties();
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.232.211:9092");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
//        prop.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
//        prop.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG,"10000");

        // 设置是否自动提交,获取数据的装态  false 手动提交    true 自动提交
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
        prop.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
        prop.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
        consumer = new KafkaConsumer<>(prop);
        consumer.subscribe(Collections.singleton(topicName));  //"user_friends"
    }

    @Override
    public void fillData(String targetName) {
        int sum=0;
        try {
            while (true){
                ConsumerRecords<String, String> records = consumer.poll(100);
                int count = writer.write(targetName, records);
                sum+=count;
                System.out.println("处理数据量:" + sum);
                Thread.sleep(50);
            }
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
}

main

package kafkatohb;

import kafkatohb.write.HBaseWriter;
import kafkatohb.write.IWorker;
import kafkatohb.write.IWriter;
import kafkatohb.write.Worker;


/**
 * 将kafka中topic为user_friends 的数据消费到hbase中的
 * events_db:user_friend中
 */
public class UserFriendToHB {
    static int num = 0;
    public static void main(String[] args) {
        IParseRecord handlerUserFriend = new UserFriendHandler();

        IWriter hBaseWriter = new HBaseWriter(handlerUserFriend);
        IWorker worker = new Worker("userfriends","ufgroup23"
                ,hBaseWriter);
        worker.fillData("events_db:user_friend");
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值