1.导入需要的依赖
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table_2.11</artifactId>
<version>1.7.0</version>
</dependency>
</dependencies>
2.编写代码
2.1这是基于stream的回溯流
package org.zsw.flinktable
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.{Table, TableEnvironment}
import org.apache.flink.types.Row
/**
* @description:
* @author: zsw
* @date: Created in 2020/4/7 15:36
* @modified By:
*/
object Flink_Table_execise01 {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val tableEnv = TableEnvironment.getTableEnvironment(env)
val dataStream: DataStream[User] = env.fromElements(
User("xiaoming", "english", 90),
User("xiaoming", "math", 80),
User("xiaohong", "math", 98),
User("xiaohong", "english", 82)
)
tableEnv.registerDataStream("t_user",dataStream)
val table: Table = tableEnv.sqlQuery("select * from t_user")
// //5.统计学生各科总分。
val userClazzSumScore: Table = tableEnv.sqlQuery("select clazz,sum(score) as sumScore from t_user group by clazz")
//6.统计学生平均分
val userAvgScore: Table = tableEnv.sqlQuery("select name,avg(score) as sumScore from t_user group by name")
//7.查询出english最分的学生姓名
//回溯流不支持order by 所以使用双表联查
// val maxEnglisg: Table = tableEnv.sqlQuery("select name,clazz,score from t_user where clazz = 'english' order by score desc limit 1")
val maxEnglisg: Table = tableEnv.sqlQuery("select * from t_user u1 left join (select clazz,max(score) as maxScore from t_user group by clazz having clazz = 'english') u2 on u1.clazz = u2.clazz where u1.score = u2.maxScore")
//8. 查询出学生姓名及总分
val userNameSumScore: Table = tableEnv.sqlQuery("select name,sum(score) as sumScore from t_user group by name")
val tableValue: DataStream[(Boolean, Row)] = tableEnv.toRetractStream[Row](userClazzSumScore)
tableValue.print()
env.execute()
}
}
case class User(name:String,clazz:String,score:Int)
输出结果
(true,english,90)
(true,math,80)
(false,math,80)
(true,math,178)
(false,english,90)
(true,english,172)
解释*
因为前两条数据不一致,当第一条数据进入时为true,表示添加,当相同的key再次进入时,会删除掉已存在的key兵进行聚合处理,false表示删除
2.2这是基于批处理的无回溯流\
package org.zsw.flinktable
import org.apache.flink.api.scala._
import org.apache.flink.table.api.{Table, TableEnvironment}
import org.apache.flink.types.Row
/**
* @description:
* @author: zsw
* @date: Created in 2020/4/7 17:37
* @modified By:
*/
object Flink_Table_execise02 {
def main(args: Array[String]): Unit = {
val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val tableEnv = TableEnvironment.getTableEnvironment(env)
val dataSet: DataSet[User] = env.fromElements(
User("xiaoming", "english", 90),
User("xiaoming", "math", 80),
User("xiaohong", "math", 98),
User("xiaohong", "english", 82)
)
// 注册表
tableEnv.registerDataSet("t_user",dataSet)
// //5.统计学生各科总分
val userClazzSumScore: Table = tableEnv.sqlQuery("select clazz,sum(score) as sumScore from t_user group by clazz")
//6.统计学生平均分。
val userAvgScore: Table = tableEnv.sqlQuery("select name,avg(score) as sumScore from t_user group by name")
//7.查询出english最分的学生姓名。
val maxEnglisg: Table = tableEnv.sqlQuery("select * from t_user where clazz = 'english' order by score desc limit 1")
//8. 查询出学生姓名及总分
val userNameSumScore: Table = tableEnv.sqlQuery("select name,sum(score) as sumScore from t_user group by name")
tableEnv.toDataSet[Row](userNameSumScore).print()
}
}
2121

被折叠的 条评论
为什么被折叠?



