如下:
package application;
import com.alibaba.fastjson.JSONObject;
import hbase.HbaseRowKeyUtil;
import operator.*;
import org.apache.commons.collections.IteratorUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer010;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import java.io.IOException;
import java.util.*;
/**
* todo 批量存储数据到hbase
*/
public class SaveDataToHbase {
public static void main(String[] args) throws Exception {
// String fileUrl = "D:\\wxgz-local\\resources_yace\\";
String fileUrl = "/zywa/job/storm/resources_new/";
// todo 2,读取kafka数据
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
//todo 获取kafka的配置属性
args = new String[]{"--input-topic", "topn_test", "--bootstrap.servers", "node2.hadoop:9092,node3.hadoop:9092",
"--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc2"};
ParameterTool parameterTool = ParameterTool.fromArgs(args);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
Properties sendPros = parameterTool.getProperties();
Properties pros = parameterTool.getProperties();
//todo 指定输入数据为kafka topic
DataStream<

本文详细介绍了如何利用Apache Flink从Kafka消费者读取实时数据,并将这些数据存储到HBase数据库中,包括配置、代码实现及关键步骤解析。
最低0.47元/天 解锁文章
3027

被折叠的 条评论
为什么被折叠?



