scala学习

import org.apache.spark.sql._
import org.apache.spark.sql.types._
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable.ArrayBuffer
import scala.io.Source

 

object DataFrameCreate{
  def main(args:Array[String]) {
    val conf = new SparkConf().setAppName("Spark Pi").setMaster("local")
    val sc = new SparkContext(conf)
    val sqlContext=new SQLContext(sc)
    val file = Source.fromFile("D:\\buffer\\data\\train.csv")
    val stop = 10
    var start = 0
    val inputData: ArrayBuffer[Row] =new ArrayBuffer[Row]()
    for (line <- file.getLines) {
      if (start < stop) {
        val lineList = line.trim.split(",")
        val newlineArray = new ArrayBuffer[Any]()
        newlineArray += lineList(0).trim
        for(i <- 1 to lineList.length-1)
        {
            newlineArray += lineList(i).trim.toDouble
        }
        inputData += Row(newlineArray: _*)
        start += 1
      }
    }
    file.close()
    val schemaStringStr = "EventId"
    val schemaStringDou = """
                       DER_ma11_MMC
                          ,DER_ma11_tran1ver1e_met_lep
                          ,DER_ma11_vi1
                          ,DER_pt_h
                          ,DER_deltaeta_jet_jet
                          ,DER_ma11_jet_jet
                          ,DER_prodeta_jet_jet
                          ,DER_deltar_tau_lep
                          ,DER_pt_tot
                          ,DER_1um_pt
                          ,DER_pt_ratio_lep_tau
                          ,DER_met_phi_centrality
                          ,DER_lep_eta_centrality
                          ,PRI_tau_pt
                          ,PRI_tau_eta
                          ,PRI_tau_phi
                          ,PRI_lep_pt
                          ,PRI_lep_eta
                          ,PRI_lep_phi
                          ,PRI_met
                          ,PRI_met_phi
                          ,PRI_met_1umet
                          ,PRI_jet_num
                          ,PRI_jet_leading_pt
                          ,PRI_jet_leading_eta
                          ,PRI_jet_leading_phi
                          ,PRI_jet_1u0leading_pt
                          ,PRI_jet_1u0leading_eta
                          ,PRI_jet_1u0leading_phi
                          ,PRI_jet_all_pt
                          ,Weight
        ,Label
                          """
    val schemaStringArray = new ArrayBuffer[StructField]
    val stringType = StructField(schemaStringStr,StringType,true)
    schemaStringArray+=stringType
    var doubleType=StructType(schemaStringDou.split(",").map { x => x.trim;StructField(x,DoubleType,true)})
    schemaStringArray ++= doubleType
    val schemaType = StructType(schemaStringArray.toList)
    val inputRDD = sc.parallelize(inputData)
    val df  = sqlContext.createDataFrame(inputRDD,schemaType)
    val collectall = df.collect()
    for(ele<-collectall) println(ele)
  }
}

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值