UserTransmitApp
kafka_input
es_output
UserTransmitApp
package com.flink.app;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.flink.bean.UserTransmit;
import com.flink.sink.ToES;
import com.kafka.kafka_api.KafkaUtils;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.Date;
//计算每天用户转发量
public class UserTransmitApp {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(3);
SingleOutputStreamOperator userDS = env.addSource(KafkaUtils.getFlinkKafkaConsumer("user_message"))
.map(new MapFunction<String, UserTransmit>() {
@Override
public UserTransmit map(String s) throws Exception {
System.out.println(s);
JSONObject jsonObject = JSON.parseObject(s);
return new UserTransmit(jsonObject.getString("username")
,jsonObject.getLongValue("transmit_time"));
}
});
//filter
SingleOutputStreamOperator user_water_ds = userDS.assignTimestampsAndWatermarks(
WatermarkStrategy.
<UserTransmit>forBoundedOutOfOrderness(Duration.ofSeconds(10))
.withTimestampAssigner((data, ts) -> data.getTransmit_time() * 1000L)
);
SingleOutputStreamOperator tuple_user_ds = user_water_ds.map(new MapFunction<UserTransmit, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> map(UserTransmit userTransmit) throws Exception {
String username = userTransmit.getUsername();
return Tuple2.of(username, 1L);
}
});
SingleOutputStreamOperator<JSONObject> result = tuple_user_ds
.keyBy(new KeySelector<Tuple2<String, Long>, String>() {
@Override
public String getKey(Tuple2<String, Long> o) throws Exception {
return o.f0;
}
})
//.sum(1)
.window(TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8)))
.sum(1)
.map(new MapFunction<Tuple2, JSONObject>() {
@Override
public JSONObject map(Tuple2 in) throws Exception {
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String dateTime = df.format(new Date());
JSONObject jO = new JSONObject();
jO.put("username", in.f0);
jO.put("dayCountTransmit", in.f1);
jO.put("day", dateTime);
System.out.println(new Date());
return jO;
}
})
.returns(TypeInformation.of(new TypeHint<JSONObject>() {
}
));
result.print("用户转发次数: ");
result.addSink(ToES.getESSink().build());
//resultDS.addSink(new HdfsUtils<User>().getTBucketingSink("hdfs://192.168.1.162:8020/UserData/"));
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
ToES
package com.flink.sink;
import com.alibaba.fastjson.JSONObject;
import com.flink.bean.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import java.util.*;
public class ToES {
public static ElasticsearchSink.Builder<JSONObject> getESSink() {
List<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("192.168.1.162", 9200));
httpHosts.add(new HttpHost("192.168.1.163", 9200));
httpHosts.add(new HttpHost("192.168.1.164", 9200));
// Builder第二个参数
MyElasticSearchSinkSFunction myElasticSearchSinkSFunction = new MyElasticSearchSinkSFunction();
ElasticsearchSink.Builder<JSONObject> esBuilder = new ElasticsearchSink.Builder<>(httpHosts, myElasticSearchSinkSFunction);
// 设置bulk的容量,1条就刷写
// TODO 生产环境不要设置为 1,影响性能,这里只是为了 快速的看到 无界流 写入 ES 的结果
esBuilder.setBulkFlushMaxActions(1);
return esBuilder;
}
public static class MyElasticSearchSinkSFunction implements ElasticsearchSinkFunction<JSONObject> {
@Override
public void process(JSONObject element, RuntimeContext ctx, RequestIndexer indexer) {
Map<String, String> sourceMap = new HashMap<String, String>();
//sourceMap.put("data", element.toString());
// SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
// String dateTime = df.format(new Date());
sourceMap.put("username", element.getString("username"));
sourceMap.put("dayCountTransmit", element.getString("dayCountTransmit"));
sourceMap.put("day", element.getString("day"));
// 创建一个Request
IndexRequest indexRequest = Requests.indexRequest("user_message").type("read11").source(sourceMap);
// 放入 indexer
indexer.add(indexRequest);
}
}
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
ArrayList<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("hadoop162", 9200));
httpHosts.add(new HttpHost("hadoop163", 9200));
httpHosts.add(new HttpHost("hadoop164", 9200));
DataStreamSource<String> ds = env.readTextFile("D:\\code\\flink111\\src\\main\\resources\\input\\water_sensor.txt");
SingleOutputStreamOperator<WaterSensor> dsWS = ds.map(new MapFunction<String, WaterSensor>() {
@Override
public WaterSensor map(String in) throws Exception {
String[] line = in.split(",");
return new WaterSensor(line[0], Long.valueOf(line[1]), Integer.valueOf(line[2]));
}
});
ElasticsearchSink<String> esSink = new ElasticsearchSink.Builder<String>(
httpHosts,
new ElasticsearchSinkFunction<String>() {
@Override
public void process(String element, RuntimeContext ctx, RequestIndexer indexer) {
//将数据放入Map
HashMap<String, String> dataMap = new HashMap<>();
dataMap.put("data", element);
//指定索引,类型,source
IndexRequest indexRequest = Requests.indexRequest("water_sensor").type("reading").source(dataMap);
indexer.add(indexRequest);
}
}
).build();
ds.addSink(esSink);
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
UserTransmit
package com.flink.bean;
public class UserTransmit {
private String username;
private long transmit_time;
@Override
public String toString() {
return "User{" +
"username='" + username + '\'' +
", transmit_time='" + transmit_time + '\'' +
'}';
}
public UserTransmit() {
}
public UserTransmit(String username, long transmit_time) {
this.username = username;
this.transmit_time = transmit_time;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public long getTransmit_time() {
return transmit_time;
}
public void setTransmit_time(long transmit_time) {
this.transmit_time = transmit_time;
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.flink</groupId>
<artifactId>flink1.11</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<flink.version>1.11.2</flink.version>
<java.version>1.8</java.version>
<scala.binary.version>2.11</scala.binary.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<log4j.version>2.12.1</log4j.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/log4j/log4j -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-shaded-hadoop-2-uber -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-hadoop-2-uber</artifactId>
<version>2.7.5-7.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.postgresql/postgresql -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.2.5</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.11</artifactId>
<version>${flink.version}</version>
<!-- <classifier>sources</classifier>-->
<!-- <type>java-source</type>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.bahir/flink-connector-redis -->
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>flink-connector-redis_${scala.binary.version}</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.postgresql/postgresql -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.2.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-filesystem_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.4</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>