Flink1.10-SQL(kafka&PG2ES)
input
//维度表数据:PG数据库
deptdim
u02 2 d02 l02
u01 1 d01 l01
u03 1 d01 l01
//主表数据:Kafka
pageLog
>u01 1234
>u02 2333
output
//ES
{
"_index" : "user_transmit",
"_type" : "read11",
"_id" : "xUgcMXgBsBclsWGUZSyg",
"_score" : 1.0,
"_source" : {
"location" : "l02",
"username" : "u02",
"deptname" : "d02",
"transmit_count" : 1
}
},
{
"_index" : "user_transmit",
"_type" : "read11",
"_id" : "izwaMXgBu-J4ALZ21oGG",
"_score" : 1.0,
"_source" : {
"location" : "l01",
"username" : "u01",
"deptname" : "d01",
"transmit_count" : 1
}
}
Kafka5Pg2ES
package com.sql.app;
import com.sql.bean.UserTransmit;
import com.sql.utils.es.User_Dept_ESSink;
import com.sql.utils.kafka_api.KafkaUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
public class Kafka5Pg2ES {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStreamSource kf_ds = env.addSource(KafkaUtils.getFlinkKafkaConsumer("pageLog"));
// water_sensor_ds.print();
EnvironmentSettings mySetting = EnvironmentSettings
.newInstance()
// .useOldPlanner()
.useBlinkPlanner()
.inStreamingMode()
.build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, mySetting);
SingleOutputStreamOperator user_ds = kf_ds.map(new MapFunction<String, UserTransmit>() {
@Override
public UserTransmit map(String s) throws Exception {
String[] split = s.split(" ");
return new UserTransmit(split[0], Long.parseLong(split[1]));
}
}).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<UserTransmit>(Time.seconds(2)) {
@Override
public long extractTimestamp(UserTransmit element) {
return element.getTransmit_time() * 1000L;
}
});
tableEnv.createTemporaryView("UserLog", user_ds);
//Table result = tableEnv.sqlQuery("select * from UserLog limit 10");
//tableEnv.toRetractStream(result, Row.class).print("sql");
// create中VARCHAR(10)等长度一直不规范,DOUBLE()肯定报错,DECIMAL(5,2)也报错.总结数据库用DECIMAL,java用double就可以
// 类型需要内部保持一致,而不是和数据库类型保持一致。如数据库是TIMESTAMP(6),但这个类处理内部是TIMESTAMP(3)
// TIMESTAMP(3) DOUBLE
String sinkDDL =
"CREATE TABLE DeptDim ( "
+ " username VARCHAR "
+ " ,deptno VARCHAR "
+ " ,deptname VARCHAR "
+ " ,location VARCHAR "
+ ") WITH ( "
+ " 'connector.type' = 'jdbc', "
+ " 'connector.url' = 'jdbc:postgresql://hadoop163:5432/test_db2', "
+ " 'connector.table' = 'deptdim', "
+ " 'connector.username' = 'test_user', "
+ " 'connector.password' = 'aaaaaa', "
+ " 'connector.write.flush.max-rows' = '1' "
+ ")";
tableEnv.sqlUpdate(sinkDDL);
// Table result = tableEnv.sqlQuery("select * from DeptDim limit 10");
// tableEnv.toRetractStream(result, Row.class).print("sql");
/**
* 主表:UserLog
* 维表:DeptDim
*/
// FOR SYSTEM_TIME AS OF PROCTIME()
Table result = tableEnv.sqlQuery("SELECT \n" +
"u.username,d.deptname,d.location,count(*) AS transmit_count\n" +
"FROM UserLog AS u\n" +
"LEFT JOIN DeptDim AS d \n" +
"ON u.username = d.username\n" +
"group by u.username,d.deptname,d.location");
//tableEnv.toRetractStream(result, Row.class).print("sql");
DataStream<Tuple2<Boolean, Row>> tableResult = tableEnv.toRetractStream(result, Row.class);
ElasticsearchSink.Builder<Tuple2<Boolean, Row>> esSink = User_Dept_ESSink.getESSink();
tableResult.print();
//print result
//(true,u01,d01,l01,1)
//(true,u02,d02,l02,1)
tableResult.addSink(esSink.build());
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
User_Dept_ESSink
package com.sql.utils.es;
import com.alibaba.fastjson.JSONObject;
import com.sql.bean.UserTransmit;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.flink.types.Row;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class User_Dept_ESSink {
public static ElasticsearchSink.Builder<Tuple2<Boolean, Row>> getESSink() {
List<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("192.168.1.162", 9200));
httpHosts.add(new HttpHost("192.168.1.163", 9200));
httpHosts.add(new HttpHost("192.168.1.164", 9200));
// Builder第二个参数
MyElasticSearchSinkSFunction myElasticSearchSinkSFunction = new MyElasticSearchSinkSFunction();
ElasticsearchSink.Builder<Tuple2<Boolean, Row>> esBuilder = new ElasticsearchSink.Builder<>(httpHosts, myElasticSearchSinkSFunction);
// 设置bulk的容量,1条就刷写
// TODO 生产环境不要设置为 1,影响性能,这里只是为了 快速的看到 无界流 写入 ES 的结果
esBuilder.setBulkFlushMaxActions(1);
return esBuilder;
}
public static class MyElasticSearchSinkSFunction implements ElasticsearchSinkFunction<Tuple2<Boolean, Row>> {
@Override
public void process(Tuple2<Boolean, Row> element, RuntimeContext ctx, RequestIndexer indexer) {
Map<String, Object> sourceMap = new HashMap<String, Object>();
//sourceMap.put("data", element.toString());
// SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
// String dateTime = df.format(new Date());
Row row = element.f1;
sourceMap.put("username", row.getField(0));
sourceMap.put("deptname", row.getField(1));
sourceMap.put("location",row.getField(2));
sourceMap.put("transmit_count",row.getField(3));
// 创建一个Request
IndexRequest indexRequest = Requests.indexRequest("user_transmit").type("read11").source(sourceMap);
// 放入 indexer
indexer.add(indexRequest);
}
}
public static void main(String[] args) {
}
}
KafkaUtils
package com.sql.utils.kafka_api;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import java.util.Properties;
public class KafkaUtils {
public static Producer getKafkaProducer(){
Properties props = new Properties();
props.put("bootstrap.servers", "192.168.1.162:9092");//kafka集群,broker-list
props.put("acks", "all");
props.put("retries", 1);//批次大小
props.put("linger.ms", 1);//等待时重试次数
props.put("batch.size", 16384);//间
props.put("buffer.memory", 33554432);//RecordAccumulator缓冲区大小
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<>(props);
return producer;
}
public static FlinkKafkaConsumer011 getFlinkKafkaConsumer(String topicName){
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "192.168.1.162:9092");
properties.setProperty("zookeeper.connect", "192.168.1.162:2181");
properties.setProperty("group.id", "KakaUtils002");
properties.setProperty("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
properties.setProperty("auto.offset.reset", "latest");
//pageLog
FlinkKafkaConsumer011 kafkaSource = new FlinkKafkaConsumer011<String>(topicName, new
SimpleStringSchema(), properties);
return kafkaSource;
}
}
UserTransmit
package com.sql.bean;
public class UserTransmit {
private String username;
private long transmit_time;
@Override
public String toString() {
return "User{" +
"username='" + username + '\'' +
", transmit_time='" + transmit_time + '\'' +
'}';
}
public UserTransmit() {
}
public UserTransmit(String username, long transmit_time) {
this.username = username;
this.transmit_time = transmit_time;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public long getTransmit_time() {
return transmit_time;
}
public void setTransmit_time(long transmit_time) {
this.transmit_time = transmit_time;
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.demo</groupId>
<artifactId>flink1.10.0</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.12</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_2.12</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.44</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-statebackend-rocksdb_2.12</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.12</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-jdbc_2.11</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.2.5</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.4</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.1.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<excludes>
<exclude>com.google.code.findbugs:jsr305</exclude>
<exclude>org.slf4j:*</exclude>
<exclude>log4j:*</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<!-- Do not copy the signatures in the META-INF folder.
Otherwise, this might cause SecurityExceptions when using the JAR. -->
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>my.programs.main.clazz</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>