从Kafka消费到数据的转换、入库(一)

采用的数据处理引擎与入库组件

处理引擎:Flink
持久化组件:Hbase、HDFS、Mysql
gradle依赖:


buildscript {
   
    repositories {
   
        jcenter() // this applies only to the Gradle 'Shadow' plugin
    }
    dependencies {
   
        classpath 'com.github.jengelman.gradle.plugins:shadow:5.2.0'
    }
}

plugins {
   
    id 'java'
    id 'application'
    // shadow plugin to produce fat JARs
    id 'com.github.johnrengelman.shadow' version '5.2.0'
}


// artifact properties
group = 'com.ryff.log'
version = '0.1-SNAPSHOT'
mainClassName = 'com.ryff.log.KafkaConsumer'
description = """RYFF Flink Job"""

ext {
   
    javaVersion = '1.8'
    flinkVersion = '1.11.2'
    scalaBinaryVersion = '2.11'
    hiveVersion= '2.1.1'
    slf4jVersion = '1.7.7'
    log4jVersion = '1.2.17'
}


sourceCompatibility = javaVersion
targetCompatibility = javaVersion
tasks.withType(JavaCompile) {
   
    options.encoding = 'UTF-8'
}

applicationDefaultJvmArgs = ["-Dlog4j.configuration=log4j.properties"]


repositories {
   
    repositories {
    maven{
    url'http://maven.aliyun.com/nexus/content/groups/public/'} }
    mavenCentral()
    maven {
    url "https://repository.apache.org/content/repositories/snapshots/" }
    maven {
    url "https://public.nexus.pentaho.org/repository/proxy-public-3rd-party-release/"}
}


configurations {
   
    flinkShadowJar 
    flinkShadowJar.exclude group: 'org.apache.flink', module: 'force-shading'
    flinkShadowJar.exclude group: 'com.google.code.findbugs', module: 'jsr305'
    flinkShadowJar.exclude group: 'org.slf4j'
    flinkShadowJar.exclude group: 'log4j'
}

dependencies {
   
    implementation "log4j:log4j:${log4jVersion}"
    implementation "org.apache.flink:flink-java:${flinkVersion}"
    implementation "org.apache.flink:flink-streaming-java_${scalaBinaryVersion}:${flinkVersion}"
    implementation "org.apache.flink:flink-clients_${scalaBinaryVersion}:${flinkVersion}"

    flinkShadowJar "org.apache.flink:flink-connector-kafka_${scalaBinaryVersion}:${flinkVersion}"
    flinkShadowJar "org.apache.flink:flink-connector-hive_${scalaBinaryVersion}:${flinkVersion}"
    flinkShadowJar "org.apache.flink:flink-table-api-java-bridge_${scalaBinaryVersion}:${flinkVersion}"
    flinkShadowJar ("org.apache.hive:hive-exec:${hiveVersion}"){
   exclude(module: 'slf4j')}
    flinkShadowJar "org.apache.flink:flink-connector-filesystem_${scalaBinaryVersion}:${flinkVersion}"
    flinkShadowJar "com.alibaba:fastjson:1.2.7"
    flinkShadowJar "mysql:mysql-connector-java:8.0.11"
    flinkShadowJar "org.apache.hbase:hbase-client:2.1.5"
    flinkShadowJar "org.roaringbitmap:RoaringBitmap:0.8.0"
}

sourceSets {
   
    main.compileClasspath += configurations.flinkShadowJar
    main.runtimeClasspath += configurations.flinkShadowJar

    test.compileClasspath += configurations.flinkShadowJar
    test.runtimeClasspath += configurations.flinkShadowJar

    javadoc.classpath += configurations.flinkShadowJar
}

run.classpath = sourceSets.main.runtimeClasspath

jar {
   
    manifest {
   
        attributes 'Built-By': System.getProperty('user.name'),
                'Build-Jdk': System.getProperty('java.version')
    }
}


shadowJar {
   
    configurations = [project.configurations.flinkShadowJar]
    zip64 true
}

基于Flink消费Kafka

import com.alibaba.fastjson.JSONObject;
import com.ryff.common.*;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringEncoder;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.state.*;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner;
import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.roaringbitmap.RoaringBitmap;

import java.math.BigDecimal;
import java.time.ZoneId;
import java.util.Properties;

public class KafkaConsumer {
   
    public static void main(String[] args) throws Exception {
   
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		//这里只是进行了简单的配置,有需要可以添加相应的其它配置
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "xxx.master:9092,xxx.server1:9092,xxx.server2:9092");
        properties.setProperty("group.id", "group1");
        //构建FlinkKafkaConsumer
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("ryffEventLog", new SimpleStringSchema(), properties);
        myConsumer.setStartFromLatest();
        DataStreamSource<String> stream = env.addSource(myConsumer);
        env.enableCheckpointing(50000);
        final OutputTag<JSONObject> createDeviceTag = new OutputTag<JSONObject>("createDeviceTag"){
   };
        final OutputTag<JSONObject> createUserTag = new OutputTag<JSONObject>("createUserTag"){
   };
        final OutputTag<JSONObject> createRoleTag = new OutputTag<JSONObject>("createRoleTag")
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值