readDataToDf

package com.gree.test

import java.util.Properties

import com.gree.util.DmSpark
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{DataFrame, Row, SaveMode}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.json.JSONObject

/**
  * Created by 260169 on 2018/8/9.
  */
object readDataToDf {

  case class Person(name: String, age: Int)

  private val sc = DmSpark.getSC()
  private val sqlHiveContext = DmSpark.getSqlHiveContext()

  import sqlHiveContext.implicits._
  import com.databricks.spark.csv._

  /*    <!-- 读取csv需要以下依赖-->
        <dependency>
            <groupId>com.databricks</groupId>
            <artifactId>spark-csv_2.10</artifactId>
            <version>1.5.0</version>
        </dependency>*/
  def main(args: Array[String]): Unit = {
    // 从txt文件 以csv方式读,如果不是逗号分隔记得先转为逗号分隔再读
    val df1: DataFrame = sqlHiveContext.csvFile("D:\\springboot\\TestSparkModel\\src\\readme.txt")
    df1.show(false)
    //读取数据并将其中的分隔符(不定个数的空格)都转为逗号,并生成一个临时文件(目录),再以csv方式读取临时文件。
    val path = new Path("D:\\springboot\\TestSparkModel\\src\\tmp")
    val fs = path.getFileSystem(new Configuration())
    if (fs.exists(path)) fs.delete(path, true)
    sc.textFile("D:\\springboot\\TestSparkModel\\src\\readme2.txt").map(_.replaceAll("\\s+", ",")).saveAsTextFile("D:\\springboot\\TestSparkModel\\src\\tmp")
    val df_readme2 = sqlHiveContext.csvFile("D:\\springboot\\TestSparkModel\\src\\tmp")
    df_readme2.show(false)
    //从json文件读
    val readme1_json = sqlHiveContext.read.json("D:\\springboot\\TestSparkModel\\src\\readme1.json").show(false)
    // 从接口返回值读json
    val jsonStr = "{\"success\":\"sucess\",\"result\":[{\"id\":\"7\",\"name\":\"文子\",\"age\":23},{\"id\":\"8\",\"name\":\"kevin\",\"age\":24},{\"id\":\"2\",\"name\":\"张思思\",\"age\":12},{\"id\":\"5\",\"name\":\"李二\",\"age\":32},{\"id\":\"1\",\"name\":\"啦啦\",\"age\":123},{\"id\":\"3\",\"name\":\"阿打算\",\"age\":21},{\"id\":\"100\",\"name\":\"撒\",\"age\":21},{\"id\":\"4\",\"name\":\"啦啦\",\"age\":32},{\"id\":\"6\",\"name\":\"李梅\",\"age\":23},{\"id\":\"9\",\"name\":\"王林\",\"age\":45},{\"id\":\"11\",\"name\":\"王立军\",\"age\":12},{\"id\":\"12\",\"name\":\"娜娜\",\"age\":21},{\"id\":\"13\",\"name\":\"赵生\",\"age\":12},{\"id\":\"3431\",\"name\":\"哈哈\",\"age\":213},{\"id\":\"34312321\",\"name\":\"哈哈\",\"age\":111}],\"error_info\":null}"

    val jsonObject: String = new JSONObject(jsonStr).getString("result")
    val df2: DataFrame = sqlHiveContext.read.json(sc.makeRDD(Seq(jsonObject)))
    df2.show(false)
    //df2.write.json("D:\\springboot\\TestSparkModel\\src\\readme2.json")

    val df_json: DataFrame = sqlHiveContext.read.json("D:\\springboot\\TestSparkModel\\src\\readme2.json")
    df_json.show(false)
    // apache parquet是一种具有卓越性能的柱状存储格式,他可以在任何hadoop项目中使用
    //df2.write.format("parquet").save("D:\\springboot\\TestSparkModel\\src\\readme3.parquet")
    val df_parquet: DataFrame = sqlHiveContext.read.parquet("D:\\springboot\\TestSparkModel\\src\\readme3.parquet")
    df_parquet.show(false)

    //df2.write.format("parquet").save("D:\\springboot\\TestSparkModel\\src\\readme4")
   
 val df_parquet1 = 
sqlHiveContext.read.parquet("D:\\springboot\\TestSparkModel\\src\\readme4").show(false)
    //df2.write.format("parquet").mode(SaveMode.Overwrite).saveAsTable("test_yw.my_parquet")
    // orc(优化行列):impala不支持orc,parquet支持所有查询引擎
    // 压缩程度:orc>parquet>text,orc约=0.5parquet
    // hive表插入数据速度:orc>parquet ,查询效率两者差不多
    //ORC可以一定程度上支持ACID操作,hive中比较提倡ORC
    // df2.write.format("orc").mode(SaveMode.Overwrite).saveAsTable("test_yw.my_orc")

    //从csv读取数据
    val df_csv = sqlHiveContext.read.format("com.databricks.spark.csv").option("header", "true")
      .option("inferSchema", true.toString).load("F:\\导数据\\query_results.csv")
    //.show(false)
  
  // 使用隐式读取csv文件
    import com.databricks.spark.csv._
    val df_csv1 = sqlHiveContext.csvFile("F:\\导数据\\query_results.csv")
    df_csv1.show(false)
    df_csv1.saveAsCsvFile("D:\\springboot\\TestSparkModel\\src\\readme5.csv")

    // 从xml 转为DF
    // val df_xml=sqlHiveContext.read.format("com.databricks.spark.xml").option("rowTag","book").option("attributePrefix","@").load("books.xml")
    //从hive表读
    val df3: DataFrame = sqlHiveContext.read.table("test_yw.student")
    df3.show(false)
    val df4 = sqlHiveContext.read.table("test_yw.my_parquet").show(false)
    val df5 = sqlHiveContext.read.table("test_yw.my_orc").show(false)
  
  // 从Seq Array  List 转为DF
    // 从 Seq
    val dfseq1 = sqlHiveContext.createDataFrame(Seq((1, 2017, 11, 13, "zhangsan", 21), (0, 2017, 11, 14, "lisi", 18), (0, 2017, 11, 14, "lisi1", 18), (1, 2017, 11, 14, "lisi", 18), (0, 2017, 11, 14, "lisi", 18), (0, 2017, 11, 14, "lisi", 18))).toDF("sum", "year", "month", "day", "name", "age").show(false)
  
  //list转DF
    val ll = List(Person("zhangsan", 21), Person("lisi", 34))
    val df_list = sqlHiveContext.createDataFrame(ll)
    df_list.show(false)
   
 // array 转DF
    var stringEn = "RatCap\nAssignedCap\nComp1AimHz\nComp1RunHz\nFan1AimHz\nFan2AimHz\nHP\nLP\nEXV1\nEXV2\nEXV1AimSuperHeat\nCondAimOutLiq\nODEnvTemp\nComp1DLT\nCondOutAirTemp\nCondOutLiqTemp\nHeatECOutAirTemp\nInspirationTemp1\nInspirationTemp2\nDef1Temp\nComp1Sta\nCallbackFRun\nFourWay1Sta\nAirBypassSta\nReOil1Sta\nComp1AidHeaterSta\n"
    stringEn = stringEn + "BottomBandHeater\nResetSta\nComp1Cur\nComp1WeakMagSta\nComp1DriModHTHzLimitSta\nComp1DriModHTHzDownSta\nComp1IPMTemp\nComp1DriPFCTemp\nCommErr\nCapCodeSetErr\nHPErr\nLPErr\nCompDLErr\nDLLowTempProtErr\nLowPresRatioErr\nHighPresRatioErr\nBoardErr\nFourWayErr\nLowHPErr\nComp1DLTErr\nODTempErr\nAirInTErr\nDef1Err\nAirOutTErr\nCondAirOutTErr\nCondLiqOutTErr\nComp1ShellErr\nHeatECOutAirTErr\nHPTErr\nLPTErr\nComp1DLErr\nComp1ShellHTProt\nPresErr\nHaltErr\nUnHaltErr\nLowRatCapRatioErr\nHighRatCapRatioErr\nReOilTErr\nLowComp1DCBusVoltErr\nHighComp1DCBusVoltErr\nComp1DriIPMErr\nComp1DriPFCErr\nComp1StartupErr\nComp1LostPhaseErr\nComp1OCErr\nComp1LostStepErr\nM2Comp1Dri1CommErr\nComp1DriIPMTempErr\nComp1DriModTempErr\nComp1DriChargeErr\nComp1SumRunTime\nComp1OnTimes\nHeatSumTime\nHeatDfRoSumTime \nCoolRoSumTime\nModabsoluteHP\nModabsoluteLP\nFanGear\nRunCapPer\nRunCap\nComp2AimHz\nComp2RunHz\nFan1RunHz\nFan2RunHz\nComp1Shell\nComp2DLT\nComp2Shell\nCondenserInTemp\nCondenserOutTemp\nModDefSta\nModFreonLackSta\nModFLargeSta\nComp2Sta\nFourWay2Sta\nComp2Cur\nRefLackErr\nComp2DLTErr\nComp1DLTConnErr\nComp1ShellConnErr\nComp2DLErr\nReOil2Sta\nCompEmerSta\nFanEmerSta\nDevEmerSta\nRefCallbackSta\nValveErr\nFan1DevSta\nFan2DevSta\nComp2SumRunTime\nComp2OnTimes\nDCMotorErr\nComp1DriResetErr\nLowComp2DCBusVoltErr\nHighComp2DCBusVoltErr\nComp2DriIPMErr\nComp2StartupErr\nComp2DriResetErr\nComp2OCErr\nComp2LostStepErr\nM2Comp2Dri1CommErr\nComp2DriIPMHTempErr\nComp2DriIPMSenErr\nComp2DriChargeErr\nComp2ShellErr\nLowFan1DriDCBusVoltErr\nHighFan1DriDCBusVoltErr\nFan1DriIPMErr\nFan1StartupErr\nFan1DriResetSta\nFan1OCSta\nFan1LostStepSta\nM2Fan1Dri1CommErr\nFan1DriIPMHTempErr\nFan1DriIPMSenErr\nLowFan2DriDCBusVoltErr\nHighFan2DriDCBusVoltErr\nFan2DriIPMErr\nFan2StartupErr\nFan2DriResetErr\nFan2OCErr\nFan2LostStepErr\nM2Fan2DriverCommErr\nFan2DriHighTempSta\nFan2DriModTsor\nFan1DriModHTHzLimitSta\nFan1DriModHTDownHzSta\nFan2DriModHTHzLimitSta\nFan2DriModHTDownHzSta\nComp2DriModHTHzLimitSta\nComp2DriModHTDownHzSta\nComp1DriACPowerType\nComp1DevSta\nComp1ProgramSta\nComp1DriPowerCur\nComp1UPhaseCur\nComp1VPhaseCur\nComp1DriACVolt\nComp1DCBusVolt\nComp1DriBoxTemp\nComp1DriCurDetectErr\nCLPSumRunTime\nCHPSumRunTime\nHeatLPSumRunTime\nHeatHPSumRunTime\nWay2Sta\nPowerconSumption\nSolenoidValveA\nSolenoidValveB\nComp1DriveACVoltSta\nRunMode\nASAutoHeatRecovery\nMode3DASFHeating\nStatPresCodeAddr\nComp2AidHeaterSta\nComp1BalaValve\nComp2BalaValve\nThrottleSolenoidSta\nRefReguIn1SolenoidSta\nRefReguIn2SolenoidSta\nRefReguUpSolenoidSta\nRefReguDownSolenoidSta\nComp2DCBusVolt\nComp2IPMTemp\nFan1DCBusVolt\nFan1Cur\nFan1IPMTemp\nFan2DCBusVolt\nFan2Cur\nFan2IPMTemp\nComp2UPhaseCur\nComp2VPhaseCur\nFan1UPhaseCur\nFan1VPhaseCur\nFan2UPhaseCur\nFan2VPhaseCur\nCompDriAddrErr\nComp2ACCurErr\nComp2DriPFCErr\nComp2LostPhaseErr\nComp2DriCurDetectErr\nEmerRunCodeErr\nODUPipeErr\nComp2DLTConnErr\nComp2ShellConnErr\nPowerPhaseSeqProt\nFan1LostPhaseErr\nFan1DriCurDetectErr\n"
    stringEn = stringEn + "Fan2LostPhaseErr\nFan2DriCurDetectErr\nCapiValveSta\nFan1WeakMagSta\nFan1DriACPowerType\nFan2WeakMagSta\nFan2DriACPowerType\nComp2WeakMagSta\nComp2DriACVoltType\nFan1DriPFCTemp\nFan1DriBoxTemp\nComp2DriPFCTemp\nComp2DriBoxTemp\nDevGroDLProtZone\nComp7Cur\nFan1DriPowerCur\nFan1DriACPowerVolt\nFan2DriPowerCur\nFan2DriACPowerVolt\nComp2PowerCur\nComp2DriACPowerVolt\nComp2DevSta\nComp2ModelCode\nFlowMeterAInsFlow\nFlowMeterAVel\nFlowMeterASumFlow\nFlowMeterBInsFlow\nFlowMeterBVel\nFlowMeterBSumFlow"

    val arrayEn = stringEn.split("\n")
    val rddEn = sc.makeRDD(arrayEn)

    var stringCh = "额定容量\n本机分配能力\n压缩机1目标频率\n压缩机1运行频率\n风机1目标频率\n风机2目标频率\n模块高压\n模块低压\n室外机制热EXV\n过冷器EXV\nEXV1目标过热度\n过冷器目标液出\n室外环境温度\n压缩机1排气温度\n过冷器气出温度\n过冷器液出温度\n换热器气出温度\n汽分进管温度\n汽分出管温度\n化霜温度1\n压缩机1状态\n收氟运行\n四通阀1状态\n气旁通阀状态\n回油阀1状态\n压缩机1电加热带\n底盘电加热带\n复位完成\n压缩机1电流\n压缩机1弱磁\n压缩机1驱动模块高温限频\n压缩机1驱动模块高温降频\n压缩机1模块温度\n压缩机1驱动PFC温度\n通讯故障\n外机容量码或跳线帽设定错误\n高压保护\n低压保护\n压缩机排气温度过高保护\n排气低温保护\n压力比过低保护\n压力比过高保护\n外机主板不良\n四通阀串气保护\n高压过低保护\n压缩机1排气感温包故障\n室外环境感温包故障\n汽分进管感温包故障\n化霜感温包1故障\n汽分出管感温包故障\n过冷器气出感温包故障\n过冷器液出感温包故障\n压缩机1壳顶感温包故障\n换热器气出感温包故障\n高压传感器故障\n低压传感器故障\n压缩机1排气温度过高保护\n压缩机1壳顶高温保护\n压力异常保护\n模块停机故障\n模块不停机故障\n内外机额定容量配比过低\n内外机额定容量配比过高\n回油感温包故障\n压缩机1驱动直流母线电压过低保护\n"
    stringCh = stringCh + "压缩机1驱动直流母线电压过高保护\n压缩机1驱动IPM模块保护\n压缩机1驱动PFC保护\n压缩机1启动失败\n压缩机1缺相保护\n压缩机1过流保护\n压缩机1失步保护\n主控与压缩机1驱动通讯故障\n压缩机1驱动IPM过温保护\n压缩机1驱动模块温度传感器故障\n压缩机1驱动充电回路故障\n压缩机1累计运行时间\n压缩机1启停次数\n制热累计时间\n制热化霜回油累计次数\n制冷回油累计次数\n模块绝对高压值\n模块绝对低压值\n风机档位\n本机目标运行能力\n本机当前运行能力\n压缩机2目标频率\n压缩机2运行频率\n风机1运行频率\n风机2运行频率\n压缩机1壳顶温度\n压缩机2排气温度\n压缩机2壳顶温度\n冷凝器进管温度\n冷凝器出管温度\n模块化霜状态\n模块欠氟状态\n模块过氟状态\n压缩机2状态\n四通阀2状态\n压缩机2电流\n缺冷媒保护\n压缩机2排气感温包故障\n压缩机1排气感温包脱落\n压缩机1壳顶感温包脱落\n压缩机2高温排气保护\n回油阀2状态\n压缩机紧急运行\n风机紧急运行\n模块紧急运行状态\n冷媒回收状态\n阀门异常\n风机1设备状态\n风机2设备状态\n压缩机2累计运行时间\n压缩机2启停次数\n直流电机故障\n压缩机1驱动模块复位\n压缩机2驱动直流母线电压过低保护\n压缩机2驱动直流母线电压过高保护\n压缩机2驱动IPM模块保护\n压缩机2启动失败\n压缩机2驱动模块复位\n压缩机2过流保护\n压缩机2失步保护\n主控与压缩机2驱动通讯故障\n压缩机2驱动模块温度过高保护\n压缩机2驱动模块温度传感器故障\n压缩机2驱动充电回路故障\n压缩机2壳顶感温包故障\n外风机1驱动直流母线电压过低保护\n外风机1驱动直流母线电压过高保护\n外风机1驱动IPM模块保护\n风机1启动失败\n风机1驱动模块复位\n风机1过流保护\n风机1失步保护\n主控与外风机1驱动通讯故障\n外风机1驱动模块温度过高保护\n外风机1驱动模块温度传感器故障\n外风机2驱动直流母线电压过低保护\n外风机2驱动直流母线电压过高保护\n外风机2驱动IPM模块保护\n风机2启动失败\n风机2驱动模块复位\n风机2过流保护\n风机2失步保护\n主控与外风机2驱动通讯故障\n外风机2驱动模块温度过高保护\n外风机2驱动模块温度传感器故障\n外风机1驱动模块高温限频\n外风机1驱动模块高温降频\n外风机2驱动模块高温限频\n外风机2驱动模块高温降频\n压缩机2驱动模块高温限频\n压缩机2驱动模块高温降频\n压缩机1驱动交流输入电源类型\n压缩机1设备状态\n压缩机1工作状态\n压缩机1输入电源电流\n压缩机1U相电流\n压缩机1V相电流\n压缩机1交流输入相电压\n压缩机1母线电压\n压缩机1驱动电器盒温度\n压缩机1驱动电流检测电路故障\n制冷低负荷累积运行时间\n制冷高负荷累积运行时间\n制热低负荷累积运行时间\n制热高负荷累积运行时间\n二通阀状态\n耗电量\n电磁阀A\n电磁阀B\n"
    stringCh = stringCh + "压缩机1驱动交流电压状态\n运行优先级\n允许开启自动热回收\n3D模式允许地暖开启\n外风机静压模式\n压缩机2电加热带\n压缩机1平衡阀\n压缩机2平衡阀\n节流电磁阀状态\n冷媒调整器进1电磁阀状态\n冷媒调整器进2电磁阀状态\n冷媒调整器上出电磁阀状态\n冷媒调整器底出电磁阀状态\n压缩机2母线电压\n压缩机2模块温度\n风机1母线电压\n风机1电流\n风机1模块温度\n风机2母线电压\n风机2电流\n风机2模块温度\n压缩机2U相电流\n压缩机2V相电流\n风机1U相电流\n风机1V相电流\n风机2U相电流\n风机2V相电流\n压缩机驱动板地址错误\n压缩机2驱动交流输入电流异常保护\n压缩机2驱动PFC保护\n压缩机2缺相保护\n压缩机2驱动电流检测电路故障\n紧急运转设定错误\n外机管路异常\n压缩机2排气感温包脱落\n压缩机2壳顶感温包脱落\n电源相序保护\n风机1缺相保护\n外风机1驱动电流检测电路故障\n风机2缺相保护\n外风机2驱动电流检测电路故障\n毛细管阀状态\n风机1弱磁\n外风机1驱动交流输入电源类型\n风机2弱磁\n外风机2驱动交流输入电源类型\n压缩机2弱磁\n压缩机2驱动交流输入电源类型\n风机1驱动PFC温度\n风机1驱动电器盒温度\n压缩机2驱动PFC温度\n压缩机2驱动电器盒温度\n机组排气保护区\n压缩机7电流\n风机1驱动输入电源电流\n外风机1驱动交流输入电源电压\n风机2驱动输入电源电流\n外风机2驱动交流输入电源电压\n压缩机2驱动输入电源电流\n压缩机2驱动交流输入相电压\n压缩机2设备状态\n压缩机2型号代码\n流量计A瞬时流量\n流量计A流速\n流量计A累积流量\n流量计B瞬时流量\n流量计B流速\n流量计B累积流量"

    val arrayCh = stringCh.split("\n")
    val rddCh = sc.makeRDD(arrayCh)
    // println(arrayCh.length,arrayEn.length)
    val rdd3 = rddCh.zip(rddEn)
    val schema = StructType(Seq(
      StructField("chinese", StringType, true) //也可是IntegerType等
      , StructField("english", StringType, true)
    ))
    val rowRDD = rdd3.map { x => Row(x._1.toString, x._2.toString) }
    val kvDF = sqlHiveContext.createDataFrame(rowRDD, schema).show(500, false)

    //从mysql数据库获取数据并转为dataFrame
    /*    val msa_all_error_record = sqlHiveContext.read.format("jdbc").options(Map("url" -> "jdbc:mysql://localhost:3306/test1", "driver" -> "com.mysql.jdbc.Driver", "dbtable" -> "student",
          "user" -> "root", "password" -> "")).load()
       // msa_all_error_record.show(false)*/
    //    val msa_all_error_record1 = sqlHiveContext.read.format("jdbc").options(Map("url" -> "jdbc:mysql://10.2.5.52:3306/gree_details", "driver" -> "com.mysql.jdbc.Driver", "dbtable" -> "msa_all_error_record",
    //      "user" -> "greesj2b", "password" -> "*#Greesj2b2016")).load()
    //    msa_all_error_record1.show(false)

    /*    //从oracle数据库获取数据并转为dataFrame
        val tbl_part_storage = sqlHiveContext.read.format("jdbc")
          .options(Map("url" -> "jdbc:oracle:thin:@10.1.11.192:1521:SALE",
            "dbtable" -> "tbl_part_storage100",
            "user" -> "dsjpgxt",
            "password" -> "gree2017dsjpgxt")).load()
        tbl_part_storage.show(10)

    */
  }

  def resultToMYsql(resultHive: DataFrame, modeResultTable: String): Unit = {
    val prop = new Properties()
    //-------------------------------连接数据库
    val url = "jdbc:mysql://10.2.5.52:3306/test?useUnicode=true&characterEncoding=utf8" //gree_details
    prop.setProperty("user", "root")
    prop.setProperty("password", "")
    resultHive.write.mode(SaveMode.Append).jdbc(url, modeResultTable, prop)

  }

  def resultToSQLServer(resultHive: DataFrame, modeResultTable: String): Unit = {
    val prop = new Properties()
    //-------------------------------连接数据库-----
    val url = "jdbc:sqlserver://10.1.18.42:1433"
    prop.put("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver")
    prop.setProperty("databaseName", "F4_Test")
    prop.setProperty("user", "f4test")
    prop.setProperty("password", "f4@123456")
    //导数据到sqlserver
    resultHive.write.mode(SaveMode.Append).jdbc(url, modeResultTable, prop)

  }

   // spark 2.0 以上支持df从es读和写入到es集群 ,按理说es比hdfs的查询速度要快,考虑是否用es替代hdfs作为spark的存储









}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值