- package cn.itcast.spark.sql
-
- import org.apache.spark.sql.{Row, SQLContext}
- import org.apache.spark.sql.types._
- import org.apache.spark.{SparkContext, SparkConf}
-
-
-
-
- object SpecifyingSchema {
- def main(args: Array[String]) {
-
- val conf = new SparkConf().setAppName("SQL-2")
-
- val sc = new SparkContext(conf)
-
- val sqlContext = new SQLContext(sc)
-
- val personRDD = sc.textFile(args(0)).map(_.split(" "))
-
- val schema = StructType(
- List(
- StructField("id", IntegerType, true),
- StructField("name", StringType, true),
- StructField("age", IntegerType, true)
- )
- )
-
- val rowRDD = personRDD.map(p => Row(p(0).toInt, p(1).trim, p(2).toInt))
-
- val personDataFrame = sqlContext.createDataFrame(rowRDD, schema)
-
- personDataFrame.registerTempTable("t_person")
-
- val df = sqlContext.sql("select * from t_person order by age desc limit 4")
-
- df.write.json(args(1))
-
- sc.stop()
- }
- }
将程序打成jar包,上传到spark集群,提交Spark任务
/usr/local/spark-1.5.2-bin-hadoop2.6/bin/spark-submit \
--class cn.itcast.spark.sql.InferringSchema \
--master spark://node1.itcast.cn:7077 \
/root/spark-mvn-1.0-SNAPSHOT.jar \
hdfs://node1.itcast.cn:9000/person.txt \
hdfs://node1.itcast.cn:9000/out1
查看结果
hdfs dfs -cat hdfs://node1.itcast.cn:9000/out1/part-r-*