flink将数据结果写入elasticsearch

一、需求

用flink流处理,将结果数据落地到elasticsearch中。

二、软件版本

flink1.8.0

elasticsearch7.8.1

kafka_2.11-1.0.0

java1.8

三、代码

1、maven的pom文件

<properties>
    <compiler.version>1.8</compiler.version>
    <flink.version>1.8.0</flink.version>
    <java.version>1.8</java.version>
    <hadoop.version>2.7.5</hadoop.version>
    <scala.binary.version>2.11</scala.binary.version>
    <maven.compiler.source>1.8</maven.compiler.source>
    <maven.compiler.target>1.8</maven.compiler.target>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
        <version>${flink.version}</version>
        <scope>provided</scope>
    </dependency>
    <!--加入下面两个依赖才会出现 Flink 的日志出来-->
    <dependency>
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-api</artifactId>
        <version>1.7.25</version>
    </dependency>
    <dependency>
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-simple</artifactId>
        <version>1.7.25</version>
    </dependency>
    <!--对象和json 互相转换的-->
    <dependency>
        <groupId>com.alibaba</groupId>
        <artifactId>fastjson</artifactId>
        <version>1.2.44</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
    <!-- 指定mysql-connector的依赖 -->
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>5.1.38</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-elasticsearch6_2.11</artifactId>
        <version>1.8.0</version>
    </dependency>

    <dependency>
        <groupId>org.apache.httpcomponents</groupId>
        <artifactId>httpclient</artifactId>
        <version>4.5.5</version>
    </dependency>
</dependencies>

<!-- This profile helps to make things run out of the box in IntelliJ -->
<!-- Its adds Flink's core classes to the runtime class path. -->
<!-- Otherwise they are missing in IntelliJ, because the dependency is 'provided' -->
<profiles>
    <profile>
        <id>add-dependencies-for-IDEA</id>

        <activation>
            <property>
                <name>idea.version</name>
            </property>
        </activation>

        <dependencies>
            <dependency>
                <groupId>org.apache.flink</groupId>
                <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
                <version>${flink.version}</version>
                <scope>compile</scope>
            </dependency>
        </dependencies>
    </profile>
</profiles>

2、代码

package com.felix.kafka;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.felix.model.SecureLog;
import com.felix.utils.CommonUtils;
import com.felix.utils.DateUtils;
import com.felix.utils.HttpUtils;
import com.felix.utils.PropertyUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.message.BasicHeader;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;

import javax.annotation.Nullable;
import java.util.*;

public class KafkaMain {

    public static void main(String[] args) throws Exception{
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //设置容错机制
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1);
        // 5s钟启动一次checkpoint   Maven projects need to be imported: Import Changes   Enable Auto-Import

//        env.enableCheckpointing(10000);
//
//        // 设置checkpoint只checkpoint一次
//        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
//        // 设置两次checkpoint的最小时间间隔
//        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(5000);
//        // 设置checkpoint的超时时长
//        env.getCheckpointConfig().setCheckpointTimeout(60000);
//        // 最大并行度
//        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
//        // 当程序关闭时,触发额外的checkpoint
//        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
//
//        // 设置checkpoint的地址
//        env.setStateBackend(new FsStateBackend("hdfs://node01:8020/flink-checkpoint/"));


        Properties props = PropertyUtils.getPropertiesWithKafkaConfig();
        DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>(
                PropertyUtils.get("input.topic"),  //kafka topic
                new SimpleStringSchema(),  // String 序列化
                props)).setParallelism(1);

     //   dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台

        DataStream<SecureLog> secureLogStream = dataStreamSource.map(new MapFunction<String, SecureLog>() {
            @Override
            public SecureLog map(String msgJson) throws Exception {
                SecureLog secureLog = new SecureLog();
                JSONObject jsonObject = JSON.parseObject(msgJson);
                secureLog.setSyslogProgram(jsonObject.getString("syslog_program"));
                secureLog.setPath(jsonObject.getString("path"));
                secureLog.setSyslogTimestamp(jsonObject.getString("syslog_timestamp"));
                secureLog.setTimestamp(jsonObject.getString("@timestamp"));
                secureLog.setSyslogHostname(jsonObject.getString("syslog_hostname"));
                secureLog.setVersion(jsonObject.getString("@version"));
                secureLog.setMessage(jsonObject.getString("message"));
                secureLog.setHost(jsonObject.getString("host"));
                secureLog.setReceivedFrom(jsonObject.getString("received_from"));
                secureLog.setSyslogPid(jsonObject.getString("syslog_pid"));
                secureLog.setReceivedAt(jsonObject.getString("received_at"));
                secureLog.setSyslogMessage(jsonObject.getString("syslog_message"));
                secureLog.setType(jsonObject.getString("type"));
                secureLog.setSysLogTimestampLong(DateUtils.dateStrToLong(secureLog.getSyslogTimestamp()));
                return secureLog;
            }
        });

//        secureLogStream.print();

        //设置水印机制
        DataStream<SecureLog> secureLogDataStreamWithWaterMask = secureLogStream.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<SecureLog>() {
            Long currentTimeStamp = 0l;
            // 延迟时间
            Long maxDelayTime = 2000l;
            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(currentTimeStamp - maxDelayTime);
            }

            @Override
            public long extractTimestamp(SecureLog secureLog, long previousElementTimestamp) {

                return Math.max(secureLog.getSysLogTimestampLong(), previousElementTimestamp);
            }
        });

//        secureLogDataStreamWithWaterMask.print();


        //落地到mysql
//        secureLogDataStreamWithWaterMask.addSink(new MySqlSink());


        //落地到es
        List<HttpHost> httpHosts = new ArrayList<>();
        httpHosts.add(new HttpHost("192.168.126.128", 9200, "http"));
        httpHosts.add(new HttpHost("192.168.126.129", 9200, "http"));
        httpHosts.add(new HttpHost("192.168.126.130", 9200, "http"));


        // use a ElasticsearchSink.Builder to create an ElasticsearchSink
        ElasticsearchSink.Builder<SecureLog> esSinkBuilder = new ElasticsearchSink.Builder<SecureLog>(
                httpHosts,
                new ElasticsearchSinkFunction<SecureLog>() {
                    public IndexRequest createIndexRequest(SecureLog secureLog) {
//                        Map<String, SecureLog> json = new HashMap<>();
//                        json.put("data", secureLog);
                        Map<String, Object> map = CommonUtils.beanToMap(secureLog);

                        return Requests.indexRequest()
                                .index("flink_secure_log")
                                .type("_doc")
                                .source(map);
                    }

                    @Override
                    public void process(SecureLog secureLog, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {
                        requestIndexer.add(createIndexRequest(secureLog));
                    }
                }
        );
        // 设置批量写数据的缓冲区大小
        esSinkBuilder.setBulkFlushMaxActions(1);

        // provide a RestClientFactory for custom configuration on the internally created REST client
        Header[] defaultHeaders = new Header[]{new BasicHeader("Authorization", HttpUtils.getHeader())};
        esSinkBuilder.setRestClientFactory(
                restClientBuilder -> {
                    restClientBuilder.setDefaultHeaders(defaultHeaders);
                }
        );

        secureLogDataStreamWithWaterMask.print();
        secureLogDataStreamWithWaterMask.addSink(esSinkBuilder.build());

        env.execute("Flink add data source");
    }




}

3、连接elasticsearch时,如果设置了用户名活密码,需要再http请求的header中加入Authorization授权信息

package com.felix.utils;

import org.apache.commons.codec.binary.Base64;

import java.nio.charset.Charset;

public class HttpUtils {

    private static final String APP_KEY = "elastic"; //es用户名
    private static final String SECRET_KEY = "123456";//es访问密码

    /**
     * 构造Basic Auth认证头信息
     *
     * @return
     */
    public static String getHeader() {
        String auth = APP_KEY + ":" + SECRET_KEY;
        byte[] encodedAuth = Base64.encodeBase64(auth.getBytes(Charset.forName("US-ASCII")));
        String authHeader = "Basic " + new String(encodedAuth);
        return authHeader;
    }


}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值