0.3 Flink--Kafka2Es之写入Es

前期成功将数据以json形式从kafka中读取出来,此片文档记录数据根据需求录入Es。

需求: index名字: YYYYMMDD-http,YYYYMMDD-dns 其中索引根据数据中的recv_time、log_type字段确定

思路flink类似Flume,数据处理由Source、Sink作为输入输出,因此Es逻辑在自定义的EsSink中实现即可。

参考网页: https://ci.apache.org/projects/flink/flink-docs-release-1.5/dev/connectors/elasticsearch.html

直接代码:

主代码中添加EsSink输出。

package com.matthew.flink;

import com.alibaba.fastjson.JSONObject;
import org.apache.flink.shaded.jackson2.org.yaml.snakeyaml.Yaml;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch5.ElasticsearchSink;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.util.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.InputStream;
import java.util.*;

/**
 * Created by developer on 7/16/18.
 */

public class Kafka2Es {

    private static final Logger logger = LoggerFactory.getLogger(Kafka2Es.class);

    public static void main(String[] args) {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        InputStream inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("config.yml");
        Map<String, String> config = new Yaml().loadAs(inputStream, Map.class);
        IOUtils.closeStream(inputStream);


        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", config.get("bootstrapServer"));
        properties.setProperty("zookeeper.connect", config.get("zookeeperConnect"));
        properties.setProperty("group.id", config.get("group.id"));
        List<String> topics = Arrays.asList(config.get("topics").split(","));
//        List<String> topics = Arrays.asList(args);


        SchemaMap schemaMap = new SchemaMap(config.get("zookeeperConnect"), config.get("avroPath"));
        FlinkKafkaConsumer010 kafkaConsumer = new FlinkKafkaConsumer010(topics, new CnpcByteArrayDeserializationScheme(schemaMap), properties);
        ElasticsearchSink elasticsearchSink = FlinkElastic.getElasticsearchSink(config.get("esTransports"), config.get("esClusterName"));

        DataStream<JSONObject> stream = env.addSource(kafkaConsumer).name("KafkaConsumer");
        stream.addSink(elasticsearchSink).name("Es");
        try {
            String t = "";
            for (String topic : topics) {
                t = t + topic +";";
            }
            env.execute("Kafka2Es-" + t.substring(0,t.length()-1));
        } catch (Exception e) {
            logger.error(e.getLocalizedMessage());
        }
    }
}

自定义EsSink实现类

 

package com.matthew.flink;


import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch5.ElasticsearchSink;
import org.apache.flink.streaming.connectors.elasticsearch5.shaded.org.joda.time.DateTime;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;

import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Created by developer on 7/17/18.
 */

public class FlinkElastic {

    public static ElasticsearchSink getElasticsearchSink(String esTransPorts, String clusterName) {
        ElasticsearchSink esSink = null;
        Map<String, String> config = new HashMap<String, String>();

        // this instructs the sink to emit after every element, otherwise they would be buffered
        config.put(ElasticsearchSink.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "3000");
        config.put(ElasticsearchSink.CONFIG_KEY_BULK_FLUSH_INTERVAL_MS, "1");
        config.put("cluster.name", clusterName);

        try {
            List<InetSocketAddress> transports = new ArrayList<InetSocketAddress>();
            // port is 9300 for elastic transportClient
//            transports.add(new InetSocketAddress("11.11.184.182", 9300));
            for (String s : esTransPorts.split(",")) {
                String[] transPort = s.split(":");
                transports.add(new InetSocketAddress(transPort[0], Integer.parseInt(transPort[1])));
            }
            
            ElasticsearchSinkFunction<JSONObject> indexLog = new ElasticsearchSinkFunction<JSONObject>() {
                public IndexRequest createIndexRequest(JSONObject elements) {
                    String log_type = elements.getString("log_type");
                    final DateTime dateTime = new DateTime(elements.getLongValue("recv_time"));
                    String indexPrefix = dateTime.toString("yyyyMMdd");
                    return Requests.indexRequest().index(indexPrefix + log_type).type(log_type).source(elements);
                }

                @Override
                public void process(JSONObject s, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {
                    requestIndexer.add(createIndexRequest(s));

                }
            };
            esSink = new ElasticsearchSink(config, transports, indexLog);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return esSink;
    }
}

 运行即可,可以在es-head插件中查看数据。

遇到问题:

1.官方文档代码配置没有提示配置Es的cluster name,导致程序发现不了es的节点,需要在官方文档的基础上配置Es集群名称。2.写入数据之前记得提前在Es中建立好数据的mapping文件,或在程序中添加检测机制。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值