spark读取hdfs上的多目录parquet文件

package mcd.etl.cosmos
import java.io.{File, PrintWriter}
import java.net.URI
import org.apache.hadoop.conf.{Configurable, Configuration}
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}
import scala.math.BigDecimal
import org.apache.hadoop.fs.FileSystem
object readParqutYearsAll {

val conf = new SparkConf().setMaster(“local[*]”).setAppName(“readParqutYearsAll”)
val spark = SparkSession.builder().config(conf).getOrCreate()

def main(args: Array[String]): Unit = {

val conf = new Configuration
val fs = FileSystem.newInstance(URI.create("hdfs://nn1:8020"), conf)
val it = fs.listFiles(new Path("/input/dataCount"), true)
while (it.hasNext) {
  val status = it.next()
  val p: Path = status.getPath
  process(p)
}

}

def process(p: Path): Unit ={
val df = spark.read.parquet(p.toString)
df.createOrReplaceTempView(“ds”)
val sql = spark.sql("select date,time,code,open,high,low,close,volume,amount,adjustflag " +
“from ds where substring(time,9,17) = 150000000 order by date”)
sql.show(10)

val  ss = sql.collect()
val pathString=p.toString.split("sh\\.")(1).split("/")(0)
val writer1 = new PrintWriter(new File("E:/lastdata/"+pathString+"-close-amount.csv"))
writer1.println("date,close,closepercent,amount,amountpercent")
for (i <- 1 until ss.length){
  val date = ss(i)(0)
  val l1 = BigDecimal.apply(ss(i)(6).toString)
  val l2 = BigDecimal.apply(ss(i-1)(6).toString)
  val value  = l1 - l2
  val percent = BigDecimal.apply(((l1/l2*100)-(100/100*100)).toString())
  val l3 = BigDecimal.apply(ss(i)(8).toString)
  val l4 = BigDecimal.apply(ss(i-1)(8).toString)
  val value1 = l3 - l4
  val percent1 = BigDecimal.apply(((l3/l4*100)-(100/100*100)).toString())
  writer1.println(date+","+value.toString()+","+percent+","+value1.toString()+","+percent1)
}
writer1.close()

}
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值