详见注释
package com.beagledata.spark
import org.apache.spark.{SparkConf, SparkContext}
/**
* 分层抽样
*
* Created by drguo on 2017/8/21.
* blog.csdn.net/dr_guo
*/
object PCSStratifiedSampling {
val conf = new SparkConf().setAppName("pcs_sampling")
.set("spark.jars.packages", "io.netty:netty-common:4.1.8.Final")
.set("spark.jars.exclude", "io.netty:netty-common")
//.setMaster("local")
val sc = new SparkContext(conf)
def main(args: Array[String]): Unit = {
//val pcs = sc.textFile("src/main/resources/part-00000")
//val pcs = sc.textFile("hdfs://xxxx:8020/data1/Data/NewPCSData")
val pcs = sc.textFile(args(0))
//设定抽样格式 double类型变