BatchTableEnvironment用于批处理场景,针对SQL相关信息,具体代码实践内容如下:
一、准备代码运行需要的文本信息,本文代码主要是读取csv文本内容,涉及student.csv和teacher.csv两个文件,文件具体内容分别如下:
学生姓名 | 性别 | 学科 | 分数 | 时间戳 | 上报时间 |
张三 | 男 | 语文 | 90.5 | 1577851151000 | 2020/1/1 11:59 |
张三 | 男 | 数学 | 100 | 1577851263000 | 2020/1/1 12:01 |
张三 | 男 | 外语 | 80 | 1577851116000 | 2020/1/1 11:58 |
李四 | 女 | 语文 | 68 | 1577851138000 | 2020/1/1 11:58 |
王二 | 女 | 外语 | 99 | 1577851400000 | 2020/1/1 12:03 |
张三 | 男 | 外语 | 80 | 1583631500000 | 2020/3/8 9:38 |
李四 | 女 | 语文 | 68 | 1584940078000 | 2020/3/23 13:07 |
班主任 | 所教学生姓名 |
教师一 | 张三 |
教师一 | 李四 |
教师二 | 王二 |
二、java代码需要实体类信息:
import java.sql.Timestamp;
@Data
public class StudentInfo{
private String name;
private String sex;
private String course;
private Float score;
private Long timestamp; //sql 关键字,sql中不可以直接使用
private Timestamp sysDate;
}
import lombok.Data;
@Data
public class Teacher {
private String teacherName;
private String studentName;
}
三、以下代码主要实现一些功能实例,具体如下:
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.BatchTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import java.sql.Timestamp;
public class FlinkTableSQLBatchExample {
public static void main(String[] args) throws Exception{
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);
//source,这里读取CSV文件,并转换为对应的Class
DataSet<StudentInfo> studentCsvInput = env
.readCsvFile("/Users/springk/Documents/student.csv")
.ignoreFirstLine().pojoType(StudentInfo.class,"name","sex","course","score","timestamp");
DataSet<StudentInfo> studentInfoDataSet = studentCsvInput.flatMap(new FlatMapFunction<StudentInfo, StudentInfo>() {
@Override
public void flatMap(StudentInfo studentInfo, Collector<StudentInfo> collector) throws Exception {
//时间戳转换成Timestamp类型,group window函数的time_attr参数必须是TIMESTAMP类型的属性
studentInfo.setSysDate(new Timestamp(studentInfo.getTimestamp()));
collector.collect(studentInfo);
}
});
studentInfoDataSet.print();
DataSet<Teacher> teacherCsvInput = env
.readCsvFile("/Users/springk/Documents/teacher.csv")
.ignoreFirstLine().pojoType(Teacher.class,"teacherName","studentName");
teacherCsvInput.print();
// //将DataSet转换为Table
Table studentInfo =tEnv.fromDataSet(studentInfoDataSet);
//将studentInfo注册为一个表
tEnv.registerTable("studentInfo",studentInfo);
//GroupBy Window Aggregation 根据name分组,统计学科数量,按天统计
Table groupWindAggrTable = tEnv.sqlQuery("SELECT " +
"TUMBLE_START(sysDate, INTERVAL '1' DAY) as wStart, " +
"name, SUM(score) as score " +
"FROM studentInfo " +
"GROUP BY TUMBLE(sysDate, INTERVAL '1' DAY), name");
DataSet<Row> groupWindAggrTableResult = tEnv.toDataSet(groupWindAggrTable, Row.class);
groupWindAggrTableResult.print();
tEnv.registerDataSet("teacher",teacherCsvInput);
Table joinTable = tEnv.sqlQuery("SELECT * " +
"FROM studentInfo LEFT JOIN teacher ON studentInfo.name = teacher.studentName");
DataSet<Row> joinTableResult = tEnv.toDataSet(joinTable, Row.class);
joinTableResult.print();
}
}
四、运行结果信息: