这里自定义了一个转换大写的UDF函数,用scala建表时会出现toDF不能导入的问题,后面通过查看源码可以找知道toDF的使用方法
正确代码
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
object ScalaTest {
def main(args: Array[String]): Unit = {
val sparkConfig = new SparkConf().setMaster("local")
val spark = SparkSession.builder().appName("test").config(sparkConfig).enableHiveSupport().getOrCreate()
// 需使用隐式转化
import spark.implicits._
spark.udf.register("to_uppercase", (s: String) => s.toUpperCase())
Seq((1, "zhangsan"), (2, "lisi"), (3, "xiaoming")).toDF("id", "name").createOrReplaceTempView("table_tmp")
spark.sql("select id, to_uppercase(name) from table_tmp").show()
spark.sql("select id, to_uppercase(name) new_name from table_tmp").show()
}
}
输出结果:
+---+----------------------+
| id|UDF:to_uppercase(name)|
+---+----------------------+
| 1| ZHANGSAN|
| 2| LISI|
| 3| XIAOMING|
+---+----------------------+
+---+--------+
| id|new_name|
+---+--------+
| 1|ZHANGSAN|
| 2| LISI|
| 3|XIAOMING|
+---+--------+
toDF()源码介绍
/**
* A container for a [[Dataset]], used for implicit conversions in Scala.
*
* To use this, import implicit conversions in SQL:
* {{{
* val spark: SparkSession = ...
* import spark.implicits._
* }}}
*
* @since 1.6.0
*/
@InterfaceStability.Stable
case class DatasetHolder[T] private[sql](private val ds: Dataset[T]) {
// This is declared with parentheses to prevent the Scala compiler from treating
// `rdd.toDS("1")` as invoking this toDS and then apply on the returned Dataset.
def toDS(): Dataset[T] = ds
// This is declared with parentheses to prevent the Scala compiler from treating
// `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
def toDF(): DataFrame = ds.toDF()
def toDF(colNames: String*): DataFrame = ds.toDF(colNames : _*)
}
再贴一个关于scala隐式讲解的链接Scala隐式详解