spark读取csv写入csv

spark读取csv,写入csv

package daily

import handler.Transfrom
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Row, SaveMode, SparkSession}


object Data {

    def main(args: Array[String]): Unit = {
        //参数接收
        val Array(input, output) = args
        //val Array(input) = args
        //配置信息
        val conf = new SparkConf().setAppName("SparkSqlHello").setMaster("local[*]")
        //创建sparksession
        val sc = SparkSession.builder().config(conf).getOrCreate()
        //读取数据
        val data = sc.sqlContext.read.format("com.databricks.spark.csv")
          .option("header", "true") //在csv第一行有属性"true",没有就是"false"
          //.option("inferSchema", true.toString) //这是自动推断属性列的数据类型
          //.option("multiLine", true)
          .load(input)
        import sc.implicits._
        val frame = data.rdd.map(x => {
            val str = x.getAs[String]("Data")
            val ChV0 = x.getAs[String]("ChV0").toFloat
            val ChV1 = x.getAs[String]("ChV1").toFloat
            val ChAD0 = x.getAs[String]("ChAD0").toFloat
            val ChAD1 = x.getAs[String]("ChAD1").toFloat
            val ZeroV = x.getAs[String]("ZeroV").toFloat
            val Sensitivity = x.getAs[String]("Sensitivity").toFloat
            val straaa = str.substring(2, str.length)
            val ints = Transfrom.sixJieXi(straaa)
            val tuple = ints.splitAt(1000)
            val tuple2 = tuple._2.splitAt(1000)
            val tuple3 = tuple2._2.splitAt(1000)
            val tuple4 = tuple3._2.splitAt(1000)
            val tuple5 = tuple4._2.splitAt(1000)
            val tuple6 = tuple5._2.splitAt(1000)
            val tuple7 = tuple6._2.splitAt(1000)

            val str1 = jisuan(tuple._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str2 = jisuan(tuple2._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str3 = jisuan(tuple3._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str4 = jisuan(tuple4._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str5 = jisuan(tuple5._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str6 = jisuan(tuple6._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str7 = jisuan(tuple7._1, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)
            val str8 = jisuan(tuple7._2, ChV1, ChV0, ChAD1, ChAD0, ZeroV, Sensitivity)

            new DataLogs(
                x.getAs("DataID"),
                x.getAs("DeviceNo"),
                x.getAs("Sensitivity"),
                x.getAs("Factor"),
                x.getAs("ChV0"),
                x.getAs("ChV1"),
                x.getAs("ChAD0"),
                x.getAs("ChAD1"),
                x.getAs("ZeroV"),
                x.getAs("TrigTime"),
                x.getAs("TrigTimeMs"),
                x.getAs("CollTime"),
                x.getAs("CollTimeMs"),
                x.getAs("LastCollTime"),
                x.getAs("Speed"),
                x.getAs("Overall"),
                x.getAs("SampleNumber"),
                x.getAs("UnitID"),
                x.getAs("DataLen"),
                x.getAs("Data"),
                x.getAs("DefBufLen"),
                x.getAs("DefBuf"),
                str1,
                str2,
                str3,
                str4,
                str5,
                str6,
                str7,
                str8
            )
        }).toDF("DataID", "DeviceNo", "Sensitivity", "Factor", "ChV0", "ChV1", "ChAD0", "ChAD1", "ZeroV", "TrigTime", "TrigTimeMs", "CollTime", "CollTimeMs", "LastCollTime", "Speed", "Overall", "SampleNumber", "UnitID", "DataLen", "Data", "DefBufLen", "DefBuf", "engineering1", "engineering2", "engineering3", "engineering4", "engineering5", "engineering6", "engineering7", "engineering8")
        frame.coalesce(1).write.mode(SaveMode.Append).format("com.databricks.spark.csv")
          .option("header", "true") //在csv第一行有属性"true",没有就是"false"
          // .option("delimiter",",")//默认以","分割
          .csv(output)

        sc.stop()
    }

    def jisuan(array: Array[Int], ChV1: Float, ChV0: Float, ChAD1: Float, ChAD0: Float, ZeroV: Float, Sensitivity: Float) = {
        array.map(xx => {
            val n = ((ChV1 - ChV0) * 1.0 / (ChAD1 - ChAD0) * (xx - ChAD0) + ChV0 - ZeroV) / Sensitivity
            n.formatted("%.4f")
        }).mkString(",")
    }
}

我这是想用Excel打开,把一个单元格8000数据分别放进8个单元格

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值