一、需求
用flink流处理,将结果数据落地到elasticsearch中。
二、软件版本
flink1.8.0
elasticsearch7.8.1
kafka_2.11-1.0.0
java1.8
三、代码
1、maven的pom文件
<properties>
<compiler.version>1.8</compiler.version>
<flink.version>1.8.0</flink.version>
<java.version>1.8</java.version>
<hadoop.version>2.7.5</hadoop.version>
<scala.binary.version>2.11</scala.binary.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!--加入下面两个依赖才会出现 Flink 的日志出来-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.25</version>
</dependency>
<!--对象和json 互相转换的-->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.44</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- 指定mysql-connector的依赖 -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.5</version>
</dependency>
</dependencies>
<!-- This profile helps to make things run out of the box in IntelliJ -->
<!-- Its adds Flink's core classes to the runtime class path. -->
<!-- Otherwise they are missing in IntelliJ, because the dependency is 'provided' -->
<profiles>
<profile>
<id>add-dependencies-for-IDEA</id>
<activation>
<property>
<name>idea.version</name>
</property>
</activation>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>compile</scope>
</dependency>
</dependencies>
</profile>
</profiles> |
2、代码
package com.felix.kafka;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.felix.model.SecureLog;
import com.felix.utils.CommonUtils;
import com.felix.utils.DateUtils;
import com.felix.utils.HttpUtils;
import com.felix.utils.PropertyUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.http.Header;
import org.apache.http.HttpHost;
import org.apache.http.message.BasicHeader;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import javax.annotation.Nullable;
import java.util.*;
public class KafkaMain {
public static void main(String[] args) throws Exception{
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//设置容错机制
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
// 5s钟启动一次checkpoint Maven projects need to be imported: Import Changes Enable Auto-Import
// env.enableCheckpointing(10000);
//
// // 设置checkpoint只checkpoint一次
// env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
// // 设置两次checkpoint的最小时间间隔
// env.getCheckpointConfig().setMinPauseBetweenCheckpoints(5000);
// // 设置checkpoint的超时时长
// env.getCheckpointConfig().setCheckpointTimeout(60000);
// // 最大并行度
// env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
// // 当程序关闭时,触发额外的checkpoint
// env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
//
// // 设置checkpoint的地址
// env.setStateBackend(new FsStateBackend("hdfs://node01:8020/flink-checkpoint/"));
Properties props = PropertyUtils.getPropertiesWithKafkaConfig();
DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>(
PropertyUtils.get("input.topic"), //kafka topic
new SimpleStringSchema(), // String 序列化
props)).setParallelism(1);
// dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台
DataStream<SecureLog> secureLogStream = dataStreamSource.map(new MapFunction<String, SecureLog>() {
@Override
public SecureLog map(String msgJson) throws Exception {
SecureLog secureLog = new SecureLog();
JSONObject jsonObject = JSON.parseObject(msgJson);
secureLog.setSyslogProgram(jsonObject.getString("syslog_program"));
secureLog.setPath(jsonObject.getString("path"));
secureLog.setSyslogTimestamp(jsonObject.getString("syslog_timestamp"));
secureLog.setTimestamp(jsonObject.getString("@timestamp"));
secureLog.setSyslogHostname(jsonObject.getString("syslog_hostname"));
secureLog.setVersion(jsonObject.getString("@version"));
secureLog.setMessage(jsonObject.getString("message"));
secureLog.setHost(jsonObject.getString("host"));
secureLog.setReceivedFrom(jsonObject.getString("received_from"));
secureLog.setSyslogPid(jsonObject.getString("syslog_pid"));
secureLog.setReceivedAt(jsonObject.getString("received_at"));
secureLog.setSyslogMessage(jsonObject.getString("syslog_message"));
secureLog.setType(jsonObject.getString("type"));
secureLog.setSysLogTimestampLong(DateUtils.dateStrToLong(secureLog.getSyslogTimestamp()));
return secureLog;
}
});
// secureLogStream.print();
//设置水印机制
DataStream<SecureLog> secureLogDataStreamWithWaterMask = secureLogStream.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<SecureLog>() {
Long currentTimeStamp = 0l;
// 延迟时间
Long maxDelayTime = 2000l;
@Nullable
@Override
public Watermark getCurrentWatermark() {
return new Watermark(currentTimeStamp - maxDelayTime);
}
@Override
public long extractTimestamp(SecureLog secureLog, long previousElementTimestamp) {
return Math.max(secureLog.getSysLogTimestampLong(), previousElementTimestamp);
}
});
// secureLogDataStreamWithWaterMask.print();
//落地到mysql
// secureLogDataStreamWithWaterMask.addSink(new MySqlSink());
//落地到es
List<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("192.168.126.128", 9200, "http"));
httpHosts.add(new HttpHost("192.168.126.129", 9200, "http"));
httpHosts.add(new HttpHost("192.168.126.130", 9200, "http"));
// use a ElasticsearchSink.Builder to create an ElasticsearchSink
ElasticsearchSink.Builder<SecureLog> esSinkBuilder = new ElasticsearchSink.Builder<SecureLog>(
httpHosts,
new ElasticsearchSinkFunction<SecureLog>() {
public IndexRequest createIndexRequest(SecureLog secureLog) {
// Map<String, SecureLog> json = new HashMap<>();
// json.put("data", secureLog);
Map<String, Object> map = CommonUtils.beanToMap(secureLog);
return Requests.indexRequest()
.index("flink_secure_log")
.type("_doc")
.source(map);
}
@Override
public void process(SecureLog secureLog, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {
requestIndexer.add(createIndexRequest(secureLog));
}
}
);
// 设置批量写数据的缓冲区大小
esSinkBuilder.setBulkFlushMaxActions(1);
// provide a RestClientFactory for custom configuration on the internally created REST client
Header[] defaultHeaders = new Header[]{new BasicHeader("Authorization", HttpUtils.getHeader())};
esSinkBuilder.setRestClientFactory(
restClientBuilder -> {
restClientBuilder.setDefaultHeaders(defaultHeaders);
}
);
secureLogDataStreamWithWaterMask.print();
secureLogDataStreamWithWaterMask.addSink(esSinkBuilder.build());
env.execute("Flink add data source");
}
}
|
3、连接elasticsearch时,如果设置了用户名活密码,需要再http请求的header中加入Authorization授权信息
package com.felix.utils;
import org.apache.commons.codec.binary.Base64;
import java.nio.charset.Charset;
public class HttpUtils {
private static final String APP_KEY = "elastic"; //es用户名
private static final String SECRET_KEY = "123456";//es访问密码
/**
* 构造Basic Auth认证头信息
*
* @return
*/
public static String getHeader() {
String auth = APP_KEY + ":" + SECRET_KEY;
byte[] encodedAuth = Base64.encodeBase64(auth.getBytes(Charset.forName("US-ASCII")));
String authHeader = "Basic " + new String(encodedAuth);
return authHeader;
}
}
|