总结
- RDD 转换成 DataFrame使用SQLContext的方法createDataFrame:
def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame
- DataFrame转换成RDD使用DataFrame的字段 rdd:
dataFrame.rdd
举例程序
package a import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.sql.SQLContext import org.apache.spark.rdd.RDD object Test { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setMaster("local").setAppName("adaa") val sc = new SparkContext(sparkConf) var sqlcontext = new SQLContext(sc) var rdd:RDD[Emp]= sc.parallelize(Array( "1 zhangsan 3000 20", "2 lisi 4000 10", "3 wangwu 8000 20", "4 zhaoliu 1000 20" )).flatMap(line =>{ var arr = line.split("\\s+") var empno:Int = arr(0).toInt var ename:String = arr(1) var sal:Int = arr(2).toInt var deptno:Int = arr(3).toInt Array(Emp(empno,ename,sal,deptno)) }) var dataframe = sqlcontext.createDataFrame(rdd, Class.forName("a.Emp")) dataframe.registerTempTable("emp") var newdataframe = sqlcontext.sql("select empno from emp") newdataframe.show() } }
rdd中数据整合时所用到的类
package a // "1 zhangsan 3000 20", class Emp(empno:Int,ename:String,sal:Int,deptno:Int) { def this(){ this(0,null,0,0) } def getEmpno = empno def getEname = ename def getSal =sal def getDeptno = deptno } object Emp{ def apply() = { new Emp() } def apply(empno:Int,ename:String,sal:Int,deptno:Int) = { new Emp(empno,ename,sal,deptno) } }