import org.apache.spark._
//import java.util._;
import scala.util.Random
import java.text.SimpleDateFormat
import java.util.Date
import scala.math._
object RDDparallelizeSaveAsFile {
def main(args:Array[String]) {
// val conf = new SparkConf().setAppName("RDDparallelize").setMaster("local")
val conf = new SparkConf().setAppName("RDDparallelize").setMaster("local")
val spark = new SparkContext(conf)
// val startTime=
// val iString=new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date() );
var startTime=System.currentTimeMillis();
var ar1=if(args.length>0) args(0).toInt else 10000000
ar1=min(ar1,Int.MaxValue)
val data=spark.parallelize(1 to ar1).map(num=>(new Random()).nextInt(1000)).map(num=>(num,1)).reduceByKey(_+_).sortBy(_._2 )
// spark.parallelize(1 to 10000000).map(num=>(new Random()).nextInt(1000)).map(num=>(num,1)).reduceByKey(_+_).sortBy(_._2 ).foreach(println)
// for(i<-1 to 1000) println( (new Random()).nextInt(1000))
var endTime=System.currentTimeMillis();
println("compute:"+(endTime-startTime)+"ms")
val iString=new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date() )
val soutput="hdfs://hadoop102:9000/output/"+iString;
println(soutput)
startTime=System.currentTimeMillis();
data.saveAsTextFile(soutput)
endTime=System.currentTimeMillis();
println("saveAsTextFile:"+(endTime-startTime)+"ms")
spark.stop()
}
}
scala-hadoop-hdfs-spark交互
最新推荐文章于 2021-03-16 17:54:20 发布