import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Properties;
/**
* 将Kafka中的topic为eventattendees中的数据消费到hbase中
* hbase中的表为events_db:event_attend
*/
public class EventAttendToHB {
static int num = 0; //计数器
public static void main(String[] args) {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "kb135:9092");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class);
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "eventattendees_group1");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(properties);
consumer.subscribe(Collections.singleton("eventattendees"));
//配置hbase信息,连接hbase数据库
Configuration conf = HBaseConfiguration.create();
conf.set(HConstants.HBASE_DIR, "hdfs://kb135:9000/hbase");
conf.set(HConstants.ZOOKEEPER_QUORUM, "kb135");
conf.set(HConstants.CLIENT_PORT_STR, "2181");
Connection connection = null;
try {
connection = ConnectionFactory.createConnection(conf);
BufferedMutatorParams bufferedMutatorParams = new BufferedMutatorParams(TableName.valueOf("events_db:event_attend"));
bufferedMutatorParams.setWriteBufferPeriodicFlushTimeoutMs(10000);//设置超时flush时间最大值
bufferedMutatorParams.writeBufferSize(10*1024*1024);//设置缓存大小flush
BufferedMutator bufferedMutator = connection.getBufferedMutator(bufferedMutatorParams) ;
ArrayList<Put> datas = new ArrayList<>();
while (true){
ConsumerRecords<String, String> poll = consumer.poll(Duration.ofMillis(100));
datas.clear(); //每次for循环前清空datas
for (ConsumerRecord<String, String> record : poll) {
String[] splits = record.value().split(",");
Put put = new Put((splits[0] + splits[1] + splits[2]).getBytes());
put.addColumn(Bytes.toBytes("euat"), Bytes.toBytes("eventid"), splits[0].getBytes());
put.addColumn("euat".getBytes(), "friendid".getBytes(),splits[1].getBytes());
put.addColumn("euat".getBytes(), "state".getBytes(),splits[2].getBytes());
datas.add(put);
}
if (datas.size() > 0){
num = num + datas.size();
bufferedMutator.mutate(datas);
}else {
System.out.println("---------num:" + num);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
kafka数据消费至hbase
最新推荐文章于 2024-03-13 22:45:02 发布