#构造case class,利用反射机制隐式转换
scala>
import
spark.implicits._
scala> val rdd= sc.textFile(
"input/textdata.txt"
)
scala>
case
class Person(
id
:Int,name:String)
scala> val
df
= rdd.map(_.
split
(
","
)).map(x=>Person(x(0).toInt,x(1))).toDF
scala>
df
.show
+---+--------+
|
id
| name|
+---+--------+
| 1|zhangsan|
| 2| lisi|
| 3| wangwu|
| 4| zhaoliu|
+---+--------+
#通过schema,Row构造dataframe
scala>
import
org.apache.spark.sql.types._
scala>
import
org.apache.spark.sql.Row
scala> val structFields = Array(StructField(
"id"
,IntegerType,
true
),StructField(
"name"
,StringType,
true
))
scala> val structType = StructType(structFields)
#创建schema结构
scala> val lines= sc.textFile(
"input/textdata.txt"
)
scala> val rdd = lines.map(_.
split
(
","
)).map(x=>Row(x(0).toInt,x(1)))
#创建RDD[Row]
scala> val
df
= spark.createDataFrame(rdd,structType)
#通过RDD[Row],schema构建DataFrame
scala>
df
.show
+---+--------+
|
id
| name|
+---+--------+
| 1|zhangsan|
| 2| lisi|
| 3| wangwu|
| 4| zhaoliu|
+---+--------+
文本:
cat
textdata.txt
1,zhangsan
2,lisi
3,wangwu
4,zhaoliu