package cn.edu.tju.demo2;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.*;
import org.apache.flink.types.Row;
public class TestEs {
private static String HOST = "xx.xx.xx.xx";
private static String INDEX = "info";
private static String FILE_PATH = "info.txt";
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
tableEnv.connect(new FileSystem().path(FILE_PATH))
.withFormat(new Csv())
.withSchema(new Schema()
.field("userId", DataTypes.STRING())
.field("ts", DataTypes.BIGINT())
.field("val", DataTypes.DOUBLE()))
.createTemporaryTable("input");
Table dataTable = tableEnv.from("input");
Table aggregateTable = dataTable
.groupBy("userId")
.select("userId, userId.count as total, val.avg as avg");
tableEnv.connect(new Elasticsearch()
.version("7")
.host(HOST, 9200, "http")
.index(INDEX)
.documentType("_doc"))
//数据模式,ES支持upsert,kafka和文件不支持。默认是使用append模式
.inUpsertMode()
.withFormat(new Json())
.withSchema(new Schema()
.field("userId", DataTypes.STRING())
.field("total", DataTypes.BIGINT())
.field("avg", DataTypes.DOUBLE()))
.createTemporaryTable("output");
aggregateTable.insertInto("output");
tableEnv.execute("my job");
}
}
info.txt的内容:
user1,1680000890,31.6
user2,1681111900,38.3
user1,1680000890,34.9