一 ,开窗函数 :
1 ,共几行 :
sql :
val sql = "select sid,sname,count(1) over() cnt from student"
val df: DataFrame = spark. sql ( sql)
df. show ( 100 )
结果 :
+
| sid| sname| cnt|
+
| 1 | 孙福龙| 8 |
| 2 | 王婷婷| 8 |
| 3 | 王丹丹| 8 |
| 4 | 孙尚文| 8 |
| 5 | 孙静雅| 8 |
| 6 | 王珞丹| 8 |
| 7 | 张三| 8 |
| 8 | 李四| 8 |
+
2 ,分数表 : 二次排序 ( 科目,分数 )
sql :
select cid, sid, score
from score
order by cid asc , score desc
结果 :
+
| cid| sid| score|
+
| 1 | 3 | 91 |
| 1 | 2 | 79 |
| 1 | 7 | 66 |
| 1 | 1 | 56 |
| 1 | 8 | 49 |
| 1 | 5 | 46 |
| 1 | 6 | 35 |
| 2 | 4 | 88 |
| 2 | 7 | 88 |
| 2 | 2 | 81 |
| 2 | 1 | 78 |
| 2 | 6 | 68 |
| 2 | 3 | 47 |
| 2 | 8 | 11 |
| 3 | 2 | 92 |
| 3 | 4 | 90 |
| 3 | 3 | 88 |
| 3 | 5 | 78 |
| 3 | 7 | 77 |
| 3 | 1 | 67 |
| 3 | 8 | 22 |
| 4 | 7 | 98 |
| 4 | 4 | 93 |
| 4 | 6 | 71 |
| 4 | 2 | 68 |
| 4 | 1 | 58 |
| 4 | 3 | 56 |
| 4 | 5 | 53 |
| 4 | 8 | 38 |
+
3 ,分组,排序 :
sql :
val sql = "select row_number() over(partition by cid order by score desc) res,cid,sid,score from score"
val df: DataFrame = spark. sql ( sql)
df. show ( 100 )
结果 :
+ -- - + -- - + -- - + -- -- - +
| res| cid| sid| score|
+ -- - + -- - + -- - + -- -- - +
| 1 | 1 | 3 | 91 |
| 2 | 1 | 2 | 79 |
| 3 | 1 | 7 | 66 |
| 4 | 1 | 1 | 56 |
| 5 | 1 | 8 | 49 |
| 6 | 1 | 5 | 46 |
| 7 | 1 | 6 | 35 |
| 1 | 3 | 2 | 92 |
| 2 | 3 | 4 | 90 |
| 3 | 3 | 3 | 88 |
| 4 | 3 | 5 | 78 |
| 5 | 3 | 7 | 77 |
| 6 | 3 | 1 | 67 |
| 7 | 3 | 8 | 22 |
| 1 | 4 | 7 | 98 |
| 2 | 4 | 4 | 93 |
| 3 | 4 | 6 | 71 |
| 4 | 4 | 2 | 68 |
| 5 | 4 | 1 | 58 |
| 6 | 4 | 3 | 56 |
| 7 | 4 | 5 | 53 |
| 8 | 4 | 8 | 38 |
| 1 | 2 | 4 | 88 |
| 2 | 2 | 7 | 88 |
| 3 | 2 | 2 | 81 |
| 4 | 2 | 1 | 78 |
| 5 | 2 | 6 | 68 |
| 6 | 2 | 3 | 47 |
| 7 | 2 | 8 | 11 |
+ -- - + -- - + -- - + -- -- - +
4 ,top3 :
sql :
val sql = "select * from
( select row_number ( ) over ( partition by cid order by score desc) num, cid, sid, score
from score) res
where res. num<= 3 "
val df: DataFrame = spark. sql ( sql)
df. show ( 100 )
结果 :
+
| num| cid| sid| score|
+
| 1 | 1 | 3 | 91 |
| 2 | 1 | 2 | 79 |
| 3 | 1 | 7 | 66 |
| 1 | 3 | 2 | 92 |
| 2 | 3 | 4 | 90 |
| 3 | 3 | 3 | 88 |
| 1 | 4 | 7 | 98 |
| 2 | 4 | 4 | 93 |
| 3 | 4 | 6 | 71 |
| 1 | 2 | 4 | 88 |
| 2 | 2 | 7 | 88 |
| 3 | 2 | 2 | 81 |
+
二 ,日期函数 :
1 ,将字符串转为日期类型 :
思路 : String - java.util.Date - long - java.sql.Date - 日期类型 核心代码 :
new Date ( format. parse ( arr ( 3 ) ) . getTime)
StructField ( "tbirth" , DateType, true ) ,
全部代码 :
val rddRowTeacher: RDD[ Row] = rddTeacher. map ( line = > {
valarr: Array[ String] = line. split ( "\t" )
Row ( arr ( 0 ) . toInt, arr ( 1 ) , arr ( 2 ) , new Date ( format. parse ( arr ( 3 ) ) . getTime) , arr ( 4 ) , arr ( 5 ) )
} )
val fieldsTeacher: Array[ StructField] = Array (
StructField ( "tid" , IntegerType, true ) ,
StructField ( "tname" , StringType, true ) ,
StructField ( "tsex" , StringType, true ) ,
StructField ( "tbirth" , DateType, true ) ,
StructField ( "tlevel" , StringType, true ) ,
StructField ( "tdepart" , StringType, true )
)
2 ,直接查询日期类型 :
sql
val sql = "select sbirth from student where sid=1"
结果 :
+
| sbirth|
+
| 1989 - 03 - 09 |
+