$ bin/spark-shell --master local[4]
scala> spark.
baseRelationToDataFrame conf emptyDataFrame implicits range sparkContext stop time
catalog createDataFrame emptyDataset listenerManager read sql streams udf
close createDataset experimental newSession readStream sqlContext table version
scala> spark.conf
18/03/19 15:22:48 WARN ObjectStore: Failed to get database global_temp, returning NoSuchObjectException
res0: org.apache.spark.sql.RuntimeConfig = org.apache.spark.sql.RuntimeConfig@4138af7
// global_temp可以跨session使用的
scala> spark.read.json("examples/src/main/resources/people.json")
org.apache.spark.sql.AnalysisException:
Path does not exist: hdfs://xxxxxxx1:8020/user/YYYYYYYYY/examples/src/main/resources/people.json;
// 系统在.sparkStaging所在的目录搜寻examples/目录
scala> spark.read.json("file:///examples/src/main/resources/people.json")
org.apache.spark.sql.AnalysisException: Path does not exist: file:/examples/src/main/resources/people.json;
......
// 不在$SPARK_HOME目录
scala> spark.read.json("file:///$SPARK_HOME/examples/src/main/resources/people.json")
org.apache.spark.sql.AnalysisException: Path does not exist: file:/$SPARK_HOME/examples/src/main/resources/people.json;
......
// 不识别$SPARK_HOME,毕竟不是同一个shell……鱼唇的尝试!
scala> spark.read.json("file:opt/bigdata/nfs/spark-2.1.2-bin-hadoop2.7/examples/src/main/resources/people.json")
res4: org.apache.spark.sql.DataFrame = [age: bigint, name: string]
// 终于对了,json返回的是DataFrame
scala> res4.show
+----+-------+
| age| name|
+----+-------+
|null|Michael|
| 30| Andy|
| 19| Justin|
+----+-------+
scala> val df =res4
df: org.apache.spark.sql.DataFrame = [age: bigint, name: string]
scala> df.printSchema
root
|-- age: long (nullable = true)
|-- name: string (nullable = true)
scala> df.select("name").show
+-------+
| name|
+-------+
|Michael|
| Andy|
| Justin|
+-------+
// DF.select("[column_name]")
scala> results.select("name", "age").show
+-------+---+
| name|age|
+-------+---+
|Michael| 29|
| Andy| 30|
| Justin| 19|
+-------+---+
// 可以多选几列
scala> df.select($"name", $"a