RDD动态转换成DataFrame

java版本:
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

import java.util.ArrayList;
import java.util.List;

/**
* Created by rong on 2016/3/19.
*/
public class RDD2DataFrameByProgrammatically {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName(“RDD2DataFrame”).setMaster(“local”);
JavaSparkContext sc= new JavaSparkContext(conf);

    JavaRDD<String> lines = sc.textFile("C://Users//rong//Desktop//persons.txt");
    SQLContext sqlContext = new SQLContext(sc);
    JavaRDD<Row> javaRdd = lines.map(new Function<String, Row>() {
        public Row call(String line) throws Exception {
            String[] str = line.split(",");
            return RowFactory.create(Integer.valueOf(str[0]),str[1],Integer.valueOf(str[2]));//Row工厂类创建row
        }
    });

    List<StructField> structFields = new ArrayList<StructField>();
    structFields.add(DataTypes.createStructField("id",DataTypes.IntegerType,true));
    structFields.add(DataTypes.createStructField("name",DataTypes.StringType,true));
    structFields.add(DataTypes.createStructField("age",DataTypes.IntegerType,true));

    StructType structType = DataTypes.createStructType(structFields);

    DataFrame df = sqlContext.createDataFrame(javaRdd,structType);
    df.registerTempTable("persons");
    df.show();

   DataFrame dfs =  sqlContext.sql("select * from persons where age > 6");
    JavaRDD<Row> rows = dfs.javaRDD();
    List<Row> list = rows.collect();
    for(Row row : list){
        System.out.println(row);
    }
}

}

scala版本:
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.{SparkContext, SparkConf}

import scala.io.Source

/**
* Created by rong on 2016/3/20.
*/
object RDD2DataFrameByProgrammatically2 {

def main(args: Array[String]) {
val conf = new SparkConf().setMaster(“local”).setAppName(“RDD2DataFrameByProgrammatically2”)
val sc = new SparkContext(conf)

val sqlContext = new SQLContext(sc) //实际生产用HiveContext
val persons = sc.textFile("C://Users//rong//Desktop//text//persons.txt")

//通过读取文件实现schame
val lines = Source.fromFile("C://Users//rong//Desktop//text//fields.txt")
var schemaString = ""
for (elem <- lines.getLines()) {
  schemaString += elem + " "
}
val str = schemaString.split("\\s+")
val structType = StructType(str.map(fieldName => StructField(fieldName, StringType, true)))

 var rowPerson = Row()
val rowRdd = persons.map(br => {
  val ps = br.split(",")
  ps.foreach(bk => rowPerson = Row.merge(rowPerson,Row(bk)))
  rowPerson  
})

val df = sqlContext.createDataFrame(rowRdd, structType)
df.registerTempTable("persons")
df.show()
df.select("name").show()//和df.select(df.col("name")).show()一样

val results = sqlContext.sql("select * from persons where age > 6")

results.map(x => x(0) + " " + x(1)).collect().foreach(println)

}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值