spark sql常用方法

常用方法

show

scala> emp.show()
+----+------+-----+------+----------+---------+----+------+
|comm|deptno|empno| ename|  hiredate|      job| mgr|   sal|
+----+------+-----+------+----------+---------+----+------+
|null|    20| 7369| SMITH|1980-12-17|    CLERK|7902| 800.0|
| 300|    30| 7499| ALLEN|1981-02-20| SALESMAN|7698|1600.5|
| 500|    30| 7521|  WARD|1981-02-22| SALESMAN|7698|1250.0|
|null|    20| 7566| JONES|1981-04-02|  MANAGER|7839|2975.0|
|1400|    30| 7654|MARTIN|1981-09-28| SALESMAN|7698|1250.0|
|null|    30| 7698| BLAKE|1981-05-01|  MANAGER|7839|2850.0|
|null|    10| 7782| CLARK|1981-06-09|  MANAGER|7839|2450.0|
|null|    20| 7788| SCOTT|1987-04-19|  ANALYST|7566|3000.0|
|null|    10| 7839|  KING|1981-11-17|PRESIDENT|null|5000.0|
|   0|    30| 7844|TURNER|1981-09-08| SALESMAN|7698|1500.0|
|null|    20| 7876| ADAMS|1987-05-23|    CLERK|7788|1100.0|
|null|    30| 7900| JAMES|1981-12-03|    CLERK|7698| 950.0|
|null|    20| 7902|  FORD|1981-12-02|  ANALYST|7566|3000.0|
|null|    10| 7934|MILLER|1982-01-23|    CLERK|7369|1300.0|
+----+------+-----+------+----------+---------+----+------+


scala> emp.show(3)
+----+------+-----+-----+----------+--------+----+------+
|comm|deptno|empno|ename|  hiredate|     job| mgr|   sal|
+----+------+-----+-----+----------+--------+----+------+
|null|    20| 7369|SMITH|1980-12-17|   CLERK|7902| 800.0|
| 300|    30| 7499|ALLEN|1981-02-20|SALESMAN|7698|1600.5|
| 500|    30| 7521| WARD|1981-02-22|SALESMAN|7698|1250.0|
+----+------+-----+-----+----------+--------+----+------+
only showing top 3 rows

collect

scala> emp.collect
res8: Array[org.apache.spark.sql.Row] = Array([null,20,7369,SMITH,1980-12-17,CLERK,7902,800.0], [300,30,7499,ALLEN,1981-02-20,SALESMAN,7698,1600.5], [500,30,7521,WARD,1981-02-22,SALESMAN,7698,1250.0], [null,20,7566,JONES,1981-04-02,MANAGER,7839,2975.0], [1400,30,7654,MARTIN,1981-09-28,SALESMAN,7698,1250.0], [null,30,7698,BLAKE,1981-05-01,MANAGER,7839,2850.0], [null,10,7782,CLARK,1981-06-09,MANAGER,7839,2450.0], [null,20,7788,SCOTT,1987-04-19,ANALYST,7566,3000.0], [null,10,7839,KING,1981-11-17,PRESIDENT,null,5000.0], [0,30,7844,TURNER,1981-09-08,SALESMAN,7698,1500.0], [null,20,7876,ADAMS,1987-05-23,CLERK,7788,1100.0], [null,30,7900,JAMES,1981-12-03,CLERK,7698,950.0], [null,20,7902,FORD,1981-12-02,ANALYST,7566,3000.0], [null,10,7934,MILLER,1982-01-23,CLERK,7369,1300.0])

collectAsList

scala> emp.collectAsList
res9: java.util.List[org.apache.spark.sql.Row] = [[null,20,7369,SMITH,1980-12-17,CLERK,7902,800.0], [300,30,7499,ALLEN,1981-02-20,SALESMAN,7698,1600.5], [500,30,7521,WARD,1981-02-22,SALESMAN,7698,1250.0], [null,20,7566,JONES,1981-04-02,MANAGER,7839,2975.0], [1400,30,7654,MARTIN,1981-09-28,SALESMAN,7698,1250.0], [null,30,7698,BLAKE,1981-05-01,MANAGER,7839,2850.0], [null,10,7782,CLARK,1981-06-09,MANAGER,7839,2450.0], [null,20,7788,SCOTT,1987-04-19,ANALYST,7566,3000.0], [null,10,7839,KING,1981-11-17,PRESIDENT,null,5000.0], [0,30,7844,TURNER,1981-09-08,SALESMAN,7698,1500.0], [null,20,7876,ADAMS,1987-05-23,CLERK,7788,1100.0], [null,30,7900,JAMES,1981-12-03,CLERK,7698,950.0], [null,20,7902,FORD,1981-12-02,ANALYST,7566,3000.0], [null,10,7934,MILLER,1982-01-23,CLERK,7369,1300.0]]

first等

scala> emp.first
res10: org.apache.spark.sql.Row = [null,20,7369,SMITH,1980-12-17,CLERK,7902,800.0]

scala> emp.head
res11: org.apache.spark.sql.Row = [null,20,7369,SMITH,1980-12-17,CLERK,7902,800.0]

scala> emp.head(2)
res12: Array[org.apache.spark.sql.Row] = Array([null,20,7369,SMITH,1980-12-17,CLERK,7902,800.0], [300,30,7499,ALLEN,1981-02-20,SALESMAN,7698,1600.5])

scala> emp.take(2)
res13: Array[org.apache.spark.sql.Row] = Array([null,20,7369,SMITH,1980-12-17,CLERK,7902,800.0], [300,30,7499,ALLEN,1981-02-20,SALESMAN,7698,1600.5])

scala> emp.takeAsList(2)
res14: java.util.List[org.apache.spark.sql.Row] = [[null,20,7369,SMITH,1980-12-17,CLERK,7902,800.0], [300,30,7499,ALLEN,1981-02-20,SALESMAN,7698,1600.5]]

where

scala> emp.where("deptno=10").show
+----+------+-----+------+----------+---------+----+------+
|comm|deptno|empno| ename|  hiredate|      job| mgr|   sal|
+----+------+-----+------+----------+---------+----+------+
|null|    10| 7782| CLARK|1981-06-09|  MANAGER|7839|2450.0|
|null|    10| 7839|  KING|1981-11-17|PRESIDENT|null|5000.0|
|null|    10| 7934|MILLER|1982-01-23|    CLERK|7369|1300.0|
+----+------+-----+------+----------+---------+----+------+


scala> emp.where("deptno=10 and sal>1400").show
+----+------+-----+-----+----------+---------+----+------+
|comm|deptno|empno|ename|  hiredate|      job| mgr|   sal|
+----+------+-----+-----+----------+---------+----+------+
|null|    10| 7782|CLARK|1981-06-09|  MANAGER|7839|2450.0|
|null|    10| 7839| KING|1981-11-17|PRESIDENT|null|5000.0|
+----+------+-----+-----+----------+---------+----+------+

filter

scala> emp.filter("deptno=10 and sal>1400").show
+----+------+-----+-----+----------+---------+----+------+
|comm|deptno|empno|ename|  hiredate|      job| mgr|   sal|
+----+------+-----+-----+----------+---------+----+------+
|null|    10| 7782|CLARK|1981-06-09|  MANAGER|7839|2450.0|
|null|    10| 7839| KING|1981-11-17|PRESIDENT|null|5000.0|
+----+------+-----+-----+----------+---------+----+------+

select

scala> emp.select("empno","ename")show(3)
+-----+-----+
|empno|ename|
+-----+-----+
| 7369|SMITH|
| 7499|ALLEN|
| 7521| WARD|
+-----+-----+
only showing top 3 rows


scala> emp.select(emp("empno"),emp("sal")+1).show(3)
+-----+---------+
|empno|(sal + 1)|
+-----+---------+
| 7369|    801.0|
| 7499|   1601.5|
| 7521|   1251.0|
+-----+---------+
only showing top 3 rows


scala> emp.select(col("sal"),col("sal")+1).show(3)
+------+---------+
|   sal|(sal + 1)|
+------+---------+
| 800.0|    801.0|
|1600.5|   1601.5|
|1250.0|   1251.0|
+------+---------+
only showing top 3 rows

selectExpr

scala> emp.selectExpr("ename","empno as no","round(sal)").show(4)
+-----+----+-------------+
|ename|  no|round(sal, 0)|
+-----+----+-------------+
|SMITH|7369|        800.0|
|ALLEN|7499|       1601.0|
| WARD|7521|       1250.0|
|JONES|7566|       2975.0|
+-----+----+-------------+
only showing top 4 rows

col

scala> emp.select(col("ename")).show
+------+
| ename|
+------+
| SMITH|
| ALLEN|
|  WARD|
| JONES|
|MARTIN|
| BLAKE|
| CLARK|
| SCOTT|
|  KING|
|TURNER|
| ADAMS|
| JAMES|
|  FORD|
|MILLER|
+------+

drop

scala> emp.drop("empno").show(3)
+----+------+-----+----------+--------+----+------+
|comm|deptno|ename|  hiredate|     job| mgr|   sal|
+----+------+-----+----------+--------+----+------+
|null|    20|SMITH|1980-12-17|   CLERK|7902| 800.0|
| 300|    30|ALLEN|1981-02-20|SALESMAN|7698|1600.5|
| 500|    30| WARD|1981-02-22|SALESMAN|7698|1250.0|
+----+------+-----+----------+--------+----+------+
only showing top 3 rows

limit

scala> emp.limit(3)
res26: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [comm: bigint, deptno: bigint ... 6 more fields]

scala> emp.limit(3).show
+----+------+-----+-----+----------+--------+----+------+
|comm|deptno|empno|ename|  hiredate|     job| mgr|   sal|
+----+------+-----+-----+----------+--------+----+------+
|null|    20| 7369|SMITH|1980-12-17|   CLERK|7902| 800.0|
| 300|    30| 7499|ALLEN|1981-02-20|SALESMAN|7698|1600.5|
| 500|    30| 7521| WARD|1981-02-22|SALESMAN|7698|1250.0|
+----+------+-----+-----+----------+--------+----+------+

orderBy & sort

scala> emp.orderBy(-col("sal")).show(3)
+----+------+-----+-----+----------+---------+----+------+
|comm|deptno|empno|ename|  hiredate|      job| mgr|   sal|
+----+------+-----+-----+----------+---------+----+------+
|null|    10| 7839| KING|1981-11-17|PRESIDENT|null|5000.0|
|null|    20| 7788|SCOTT|1987-04-19|  ANALYST|7566|3000.0|
|null|    20| 7902| FORD|1981-12-02|  ANALYST|7566|3000.0|
+----+------+-----+-----+----------+---------+----+------+
only showing top 3 rows


scala> emp.sort(-col("sal")).show(3)
+----+------+-----+-----+----------+---------+----+------+
|comm|deptno|empno|ename|  hiredate|      job| mgr|   sal|
+----+------+-----+-----+----------+---------+----+------+
|null|    10| 7839| KING|1981-11-17|PRESIDENT|null|5000.0|
|null|    20| 7788|SCOTT|1987-04-19|  ANALYST|7566|3000.0|
|null|    20| 7902| FORD|1981-12-02|  ANALYST|7566|3000.0|
+----+------+-----+-----+----------+---------+----+------+
only showing top 3 rows

sortWithinPartitions

scala> emp.repartition(2).sortWithinPartitions("sal").show
+----+------+-----+------+----------+---------+----+------+
|comm|deptno|empno| ename|  hiredate|      job| mgr|   sal|
+----+------+-----+------+----------+---------+----+------+
|null|    20| 7369| SMITH|1980-12-17|    CLERK|7902| 800.0|
|null|    20| 7876| ADAMS|1987-05-23|    CLERK|7788|1100.0|
|1400|    30| 7654|MARTIN|1981-09-28| SALESMAN|7698|1250.0|
| 500|    30| 7521|  WARD|1981-02-22| SALESMAN|7698|1250.0|
|null|    10| 7934|MILLER|1982-01-23|    CLERK|7369|1300.0|
|null|    20| 7566| JONES|1981-04-02|  MANAGER|7839|2975.0|
|null|    20| 7788| SCOTT|1987-04-19|  ANALYST|7566|3000.0|
|null|    30| 7900| JAMES|1981-12-03|    CLERK|7698| 950.0|
|   0|    30| 7844|TURNER|1981-09-08| SALESMAN|7698|1500.0|
| 300|    30| 7499| ALLEN|1981-02-20| SALESMAN|7698|1600.5|
|null|    10| 7782| CLARK|1981-06-09|  MANAGER|7839|2450.0|
|null|    30| 7698| BLAKE|1981-05-01|  MANAGER|7839|2850.0|
|null|    20| 7902|  FORD|1981-12-02|  ANALYST|7566|3000.0|
|null|    10| 7839|  KING|1981-11-17|PRESIDENT|null|5000.0|
+----+------+-----+------+----------+---------+----+------+

groupBy
scala> emp.groupBy(“deptno”).count.show

+------+-----+
|deptno|count|
+------+-----+
|    10|    3|
|    30|    6|
|    20|    5|
+------+-----+

cube

scala> emp.cube("deptno","job").sum("sal").show()
+------+---------+--------+
|deptno|      job|sum(sal)|
+------+---------+--------+
|    20|  MANAGER|  2975.0|
|  null|PRESIDENT|  5000.0|
|  null|     null| 29025.5|
|    10|PRESIDENT|  5000.0|
|  null| SALESMAN|  5600.5|
|    30|    CLERK|   950.0|
|    10|     null|  8750.0|
|    20|    CLERK|  1900.0|
|  null|  ANALYST|  6000.0|
|    30| SALESMAN|  5600.5|
|    20|     null| 10875.0|
|    10|    CLERK|  1300.0|
|  null|  MANAGER|  8275.0|
|    30|     null|  9400.5|
|  null|    CLERK|  4150.0|
|    20|  ANALYST|  6000.0|
|    30|  MANAGER|  2850.0|
|    10|  MANAGER|  2450.0|
+------+---------+--------+

rollup

scala> emp.rollup("deptno","job").sum("sal").show()
+------+---------+--------+
|deptno|      job|sum(sal)|
+------+---------+--------+
|    20|  MANAGER|  2975.0|
|  null|     null| 29025.5|
|    10|PRESIDENT|  5000.0|
|    30|    CLERK|   950.0|
|    10|     null|  8750.0|
|    20|    CLERK|  1900.0|
|    30| SALESMAN|  5600.5|
|    20|     null| 10875.0|
|    10|    CLERK|  1300.0|
|    30|     null|  9400.5|
|    20|  ANALYST|  6000.0|
|    30|  MANAGER|  2850.0|
|    10|  MANAGER|  2450.0|
+------+---------+--------+

max

scala> emp.groupBy("deptno").max("sal").show()
+------+--------+
|deptno|max(sal)|
+------+--------+
|    10|  5000.0|
|    30|  2850.0|
|    20|  3000.0|
+------+--------+

distinct

scala> emp.distinct.show(3)
+----+------+-----+-----+----------+-------+----+------+
|comm|deptno|empno|ename|  hiredate|    job| mgr|   sal|
+----+------+-----+-----+----------+-------+----+------+
|null|    20| 7876|ADAMS|1987-05-23|  CLERK|7788|1100.0|
|null|    20| 7902| FORD|1981-12-02|ANALYST|7566|3000.0|
|null|    20| 7788|SCOTT|1987-04-19|ANALYST|7566|3000.0|
+----+------+-----+-----+----------+-------+----+------+
only showing top 3 rows

dropDuplicates

scala> emp.dropDuplicates("deptno").show
+----+------+-----+-----+----------+--------+----+------+
|comm|deptno|empno|ename|  hiredate|     job| mgr|   sal|
+----+------+-----+-----+----------+--------+----+------+
|null|    10| 7782|CLARK|1981-06-09| MANAGER|7839|2450.0|
| 300|    30| 7499|ALLEN|1981-02-20|SALESMAN|7698|1600.5|
|null|    20| 7369|SMITH|1980-12-17|   CLERK|7902| 800.0|
+----+------+-----+-----+----------+--------+----+------+

union

scala> emp.limit(1).union(emp.limit(1)).show
+----+------+-----+-----+----------+-----+----+-----+
|comm|deptno|empno|ename|  hiredate|  job| mgr|  sal|
+----+------+-----+-----+----------+-----+----+-----+
|null|    20| 7369|SMITH|1980-12-17|CLERK|7902|800.0|
|null|    20| 7369|SMITH|1980-12-17|CLERK|7902|800.0|
+----+------+-----+-----+----------+-----+----+-----+

join

scala> emp.join(emp,"ename").show
+------+----+------+-----+----------+---------+----+------+----+------+-----+----------+---------+----+------+
| ename|comm|deptno|empno|  hiredate|      job| mgr|   sal|comm|deptno|empno|  hiredate|      job| mgr|   sal|
+------+----+------+-----+----------+---------+----+------+----+------+-----+----------+---------+----+------+
| SMITH|null|    20| 7369|1980-12-17|    CLERK|7902| 800.0|null|    20| 7369|1980-12-17|    CLERK|7902| 800.0|
| ALLEN| 300|    30| 7499|1981-02-20| SALESMAN|7698|1600.5| 300|    30| 7499|1981-02-20| SALESMAN|7698|1600.5|
|  WARD| 500|    30| 7521|1981-02-22| SALESMAN|7698|1250.0| 500|    30| 7521|1981-02-22| SALESMAN|7698|1250.0|
| JONES|null|    20| 7566|1981-04-02|  MANAGER|7839|2975.0|null|    20| 7566|1981-04-02|  MANAGER|7839|2975.0|
|MARTIN|1400|    30| 7654|1981-09-28| SALESMAN|7698|1250.0|1400|    30| 7654|1981-09-28| SALESMAN|7698|1250.0|
| BLAKE|null|    30| 7698|1981-05-01|  MANAGER|7839|2850.0|null|    30| 7698|1981-05-01|  MANAGER|7839|2850.0|
| CLARK|null|    10| 7782|1981-06-09|  MANAGER|7839|2450.0|null|    10| 7782|1981-06-09|  MANAGER|7839|2450.0|
| SCOTT|null|    20| 7788|1987-04-19|  ANALYST|7566|3000.0|null|    20| 
+------+----+------+-----+----------+---------+----+------+----+------+-----+----------+---------+----+------+

用多个字段连接.不一定要包装在Seq中,List,Array也可以

scala> emp.join(emp,Seq("ename","deptno")).show
+------+------+----+-----+----------+---------+----+------+----+-----+----------+---------+----+------+
| ename|deptno|comm|empno|  hiredate|      job| mgr|   sal|comm|empno|  hiredate|      job| mgr|   sal|
+------+------+----+-----+----------+---------+----+------+----+-----+----------+---------+----+------+
| SMITH|    20|null| 7369|1980-12-17|    CLERK|7902| 800.0|null| 7369|1980-12-17|    CLERK|7902| 800.0|
| ALLEN|    30| 300| 7499|1981-02-20| SALESMAN|7698|1600.5| 300| 7499|1981-02-20| SALESMAN|7698|1600.5|
|  WARD|    30| 500| 7521|1981-02-22| SALESMAN|7698|1250.0| 500| 7521|1981-02-22| SALESMAN|7698|1250.0|
| JONES|    20|null| 7566|1981-04-02|  MANAGER|7839|2975.0|null| 7566|1981-04-02|  MANAGER|7839|2975.0|
|MARTIN|    30|1400| 7654|1981-09-28| SALESMAN|7698|1250.0|1400| 7654|1981-09-28| SALESMAN|7698|1250.0|
| BLAKE|    30|null| 7698|1981-05-01|  MANAGER|7839|2850.0|null| 
+------+------+----+-----+----------+---------+----+------+----+-----+----------+---------+----+------+

内连接

scala> emp.join(emp,Array("ename","deptno"),"inner").show(3)
+-----+------+----+-----+----------+--------+----+------+----+-----+----------+--------+----+------+
|ename|deptno|comm|empno|  hiredate|     job| mgr|   sal|comm|empno|  hiredate|     job| mgr|   sal|
+-----+------+----+-----+----------+--------+----+------+----+-----+----------+--------+----+------+
|SMITH|    20|null| 7369|1980-12-17|   CLERK|7902| 800.0|null| 7369|1980-12-17|   CLERK|7902| 800.0|
|ALLEN|    30| 300| 7499|1981-02-20|SALESMAN|7698|1600.5| 300| 7499|1981-02-20|SALESMAN|7698|1600.5|
| WARD|    30| 500| 7521|1981-02-22|SALESMAN|7698|1250.0| 500| 7521|1981-02-22|SALESMAN|7698|1250.0|
+-----+------+----+-----+----------+--------+----+------+----+-----+----------+--------+----+------+

换种写法

scala> emp.join(emp,emp("ename")===emp("ename")).show(3)
20/11/24 21:27:48 WARN sql.Column: Constructing trivially true equals predicate, 'ename#79 = ename#79'. Perhaps you need to use aliases.
+----+------+-----+-----+----------+--------+----+------+----+------+-----+-----+----------+--------+----+------+
|comm|deptno|empno|ename|  hiredate|     job| mgr|   sal|comm|deptno|empno|ename|  hiredate|     job| mgr|   sal|
+----+------+-----+-----+----------+--------+----+------+----+------+-----+-----+----------+--------+----+------+
|null|    20| 7369|SMITH|1980-12-17|   CLERK|7902| 800.0|null|    20| 7369|SMITH|1980-12-17|   CLERK|7902| 800.0|
| 300|    30| 7499|ALLEN|1981-02-20|SALESMAN|7698|1600.5| 300|    30| 7499|ALLEN|1981-02-20|SALESMAN|7698|1600.5|
| 500|    30| 7521| WARD|1981-02-22|SALESMAN|7698|1250.0| 500|    30| 7521| WARD|1981-02-22|SALESMAN|7698|1250.0|
+----+------+-----+-----+----------+--------+----+------+----+------+-----+-----+----------+--------+----+------+

求交集

scala> emp.intersect(emp.limit(1)).show()
+----+------+-----+-----+----------+-----+----+-----+
|comm|deptno|empno|ename|  hiredate|  job| mgr|  sal|
+----+------+-----+-----+----------+-----+----+-----+
|null|    20| 7369|SMITH|1980-12-17|CLERK|7902|800.0|
+----+------+-----+-----+----------+-----+----+-----+

一个有而另一个没有

scala> emp.except(emp.limit(1)).show()
+----+------+-----+------+----------+---------+----+------+
|comm|deptno|empno| ename|  hiredate|      job| mgr|   sal|
+----+------+-----+------+----------+---------+----+------+
|null|    30| 7900| JAMES|1981-12-03|    CLERK|7698| 950.0|
|null|    20| 7902|  FORD|1981-12-02|  ANALYST|7566|3000.0|
|1400|    30| 7654|MARTIN|1981-09-28| SALESMAN|7698|1250.0|
| 300|    30| 7499| ALLEN|1981-02-20| SALESMAN|7698|1600.5|
|null|    10| 7782| CLARK|1981-06-09|  MANAGER|7839|2450.0|
|null|    20| 7788| SCOTT|1987-04-19|  ANALYST|7566|3000.0|
| 500|    30| 7521|  WARD|1981-02-22| SALESMAN|7698|1250.0|
|null|    10| 7934|MILLER|1982-01-23|    CLERK|7369|1300.0|
|null|    10| 7839|  KING|1981-11-17|PRESIDENT|null|5000.0|
|   0|    30| 7844|TURNER|1981-09-08| SALESMAN|7698|1500.0|
|null|    30| 7698| BLAKE|1981-05-01|  MANAGER|7839|2850.0|
|null|    20| 7566| JONES|1981-04-02|  MANAGER|7839|2975.0|
|null|    20| 7876| ADAMS|1987-05-23|    CLERK|7788|1100.0|
+----+------+-----+------+----------+---------+----+------+

重命名

scala> emp.withColumnRenamed("deptno",  "no").show(3)
+----+---+-----+-----+----------+--------+----+------+
|comm| no|empno|ename|  hiredate|     job| mgr|   sal|
+----+---+-----+-----+----------+--------+----+------+
|null| 20| 7369|SMITH|1980-12-17|   CLERK|7902| 800.0|
| 300| 30| 7499|ALLEN|1981-02-20|SALESMAN|7698|1600.5|
| 500| 30| 7521| WARD|1981-02-22|SALESMAN|7698|1250.0|
+----+---+-----+-----+----------+--------+----+------+

新增1列

scala> emp.withColumn("salary2",emp("sal")).show(2)
+----+------+-----+-----+----------+--------+----+------+-------+
|comm|deptno|empno|ename|  hiredate|     job| mgr|   sal|salary2|
+----+------+-----+-----+----------+--------+----+------+-------+
|null|    20| 7369|SMITH|1980-12-17|   CLERK|7902| 800.0|  800.0|
| 300|    30| 7499|ALLEN|1981-02-20|SALESMAN|7698|1600.5| 1600.5|
+----+------+-----+-----+----------+--------+----+------+-------+

两种风格的写法

DSL风格写法

//降序排列
val df1: Unit = df.select("empno","ename","deptno")
    .orderBy($"deptno".desc).show()

SQL风格写法

 df.createTempView("emp")
  val sql="select * from emp where deptno<30"
  private val frame: DataFrame = spark.sql(sql.stripMargin)
  frame.show()
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值