spark-scala transforamtion union join distinct

12 篇文章 0 订阅
import org.apache.spark._
import org.apache.spark.network.netty.SparkTransportConf

object Transformation {
  def main(args:Array[String]): Unit ={
    val conf =new SparkConf().setAppName("Transformation1").setMaster("local")
    val spark=new SparkContext(conf)
    //union
    var a1=spark.parallelize(List(('a',1),('b',1)))
    var a2=spark.parallelize(List(('c',1),('d',1)))
    val result= a1.union(a2)
    println(result.count())
    println(result.collect().length)
    for(i<-0 until result.collect().length) (result.collect())(i)
    for((i,j)<- result.collect()) println(i+":"+j)

    //distinct
    var a3=spark.parallelize(List(('a',1),('b',1),('a',1)))
    var a4=spark.parallelize(List(('c',1),('d',1),('b',1),('b',2),('b',3),('a',1),('a',2)))
    val r2=a3.distinct()

    for (i <- r2){
      println(i)
    }
//    for((i,j)<-a3) println("a3:"+i+":"+j)
//    for((i,j)<-r2) println("r2:"+i+":"+j)


    //
    var j1=a3.join(a4)

    for (i <- j1){
      println(i)
    }
//    for((i,(j,k))<-j1) println("j1:"+i+":"+j+":"+k)
  }
}

output


"/Applications/IntelliJ IDEA.app/Contents/jbr/Contents/Home/bin/java" "-javaagent:/Applications/IntelliJ IDEA.app/Contents/lib/idea_rt.jar=58444:/Applications/IntelliJ IDEA.app/Contents/bin" -Dfile.encoding=UTF-8 -classpath /Users/andrew/IdeaProjects/scala_from_scratch/target/classes:/Users/andrew/.ivy2/cache/org.scala-lang/scala-reflect/jars/scala-reflect-2.12.10.jar:/Users/andrew/.ivy2/cache/org.scala-lang/scala-library/jars/scala-library-2.12.10.jar:/Users/andrew/.m2/repository/org/apache/spark/spark-core_2.12/3.0.0/spark-core_2.12-3.0.0.jar:/Users/andrew/.m2/repository/com/thoughtworks/paranamer/paranamer/2.8/paranamer-2.8.jar:/Users/andrew/.m2/repository/org/apache/avro/avro/1.8.2/avro-1.8.2.jar:/Users/andrew/.m2/repository/org/codehaus/jackson/jackson-core-asl/1.9.13/jackson-core-asl-1.9.13.jar:/Users/andrew/.m2/repository/org/codehaus/jackson/jackson-mapper-asl/1.9.13/jackson-mapper-asl-1.9.13.jar:/Users/andrew/.m2/repository/org/apache/commons/commons-compress/1.8.1/commons-compress-1.8.1.jar:/Users/andrew/.m2/repository/org/tukaani/xz/1.5/xz-1.5.jar:/Users/andrew/.m2/repository/org/apache/avro/avro-mapred/1.8.2/avro-mapred-1.8.2-hadoop2.jar:/Users/andrew/.m2/repository/org/apache/avro/avro-ipc/1.8.2/avro-ipc-1.8.2.jar:/Users/andrew/.m2/repository/commons-codec/commons-codec/1.9/commons-codec-1.9.jar:/Users/andrew/.m2/repository/com/twitter/chill_2.12/0.9.5/chill_2.12-0.9.5.jar:/Users/andrew/.m2/repository/com/esotericsoftware/kryo-shaded/4.0.2/kryo-shaded-4.0.2.jar:/Users/andrew/.m2/repository/com/esotericsoftware/minlog/1.3.0/minlog-1.3.0.jar:/Users/andrew/.m2/repository/org/objenesis/objenesis/2.5.1/objenesis-2.5.1.jar:/Users/andrew/.m2/repository/com/twitter/chill-java/0.9.5/chill-java-0.9.5.jar:/Users/andrew/.m2/repository/org/apache/xbean/xbean-asm7-shaded/4.15/xbean-asm7-shaded-4.15.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-client/2.7.4/hadoop-client-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-common/2.7.4/hadoop-common-2.7.4.jar:/Users/andrew/.m2/repository/commons-cli/commons-cli/1.2/commons-cli-1.2.jar:/Users/andrew/.m2/repository/xmlenc/xmlenc/0.52/xmlenc-0.52.jar:/Users/andrew/.m2/repository/commons-httpclient/commons-httpclient/3.1/commons-httpclient-3.1.jar:/Users/andrew/.m2/repository/commons-io/commons-io/2.4/commons-io-2.4.jar:/Users/andrew/.m2/repository/commons-collections/commons-collections/3.2.2/commons-collections-3.2.2.jar:/Users/andrew/.m2/repository/org/mortbay/jetty/jetty-sslengine/6.1.26/jetty-sslengine-6.1.26.jar:/Users/andrew/.m2/repository/javax/servlet/jsp/jsp-api/2.1/jsp-api-2.1.jar:/Users/andrew/.m2/repository/commons-lang/commons-lang/2.6/commons-lang-2.6.jar:/Users/andrew/.m2/repository/commons-configuration/commons-configuration/1.6/commons-configuration-1.6.jar:/Users/andrew/.m2/repository/commons-digester/commons-digester/1.8/commons-digester-1.8.jar:/Users/andrew/.m2/repository/commons-beanutils/commons-beanutils/1.7.0/commons-beanutils-1.7.0.jar:/Users/andrew/.m2/repository/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar:/Users/andrew/.m2/repository/com/google/code/gson/gson/2.2.4/gson-2.2.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-auth/2.7.4/hadoop-auth-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/httpcomponents/httpclient/4.2.5/httpclient-4.2.5.jar:/Users/andrew/.m2/repository/org/apache/httpcomponents/httpcore/4.2.4/httpcore-4.2.4.jar:/Users/andrew/.m2/repository/org/apache/directory/server/apacheds-kerberos-codec/2.0.0-M15/apacheds-kerberos-codec-2.0.0-M15.jar:/Users/andrew/.m2/repository/org/apache/directory/server/apacheds-i18n/2.0.0-M15/apacheds-i18n-2.0.0-M15.jar:/Users/andrew/.m2/repository/org/apache/directory/api/api-asn1-api/1.0.0-M20/api-asn1-api-1.0.0-M20.jar:/Users/andrew/.m2/repository/org/apache/directory/api/api-util/1.0.0-M20/api-util-1.0.0-M20.jar:/Users/andrew/.m2/repository/org/apache/curator/curator-client/2.7.1/curator-client-2.7.1.jar:/Users/andrew/.m2/repository/org/apache/htrace/htrace-core/3.1.0-incubating/htrace-core-3.1.0-incubating.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-hdfs/2.7.4/hadoop-hdfs-2.7.4.jar:/Users/andrew/.m2/repository/org/mortbay/jetty/jetty-util/6.1.26/jetty-util-6.1.26.jar:/Users/andrew/.m2/repository/xerces/xercesImpl/2.9.1/xercesImpl-2.9.1.jar:/Users/andrew/.m2/repository/xml-apis/xml-apis/1.3.04/xml-apis-1.3.04.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-app/2.7.4/hadoop-mapreduce-client-app-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-common/2.7.4/hadoop-mapreduce-client-common-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-yarn-client/2.7.4/hadoop-yarn-client-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-yarn-server-common/2.7.4/hadoop-yarn-server-common-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-shuffle/2.7.4/hadoop-mapreduce-client-shuffle-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-yarn-api/2.7.4/hadoop-yarn-api-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-core/2.7.4/hadoop-mapreduce-client-core-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-yarn-common/2.7.4/hadoop-yarn-common-2.7.4.jar:/Users/andrew/.m2/repository/javax/xml/bind/jaxb-api/2.2.2/jaxb-api-2.2.2.jar:/Users/andrew/.m2/repository/javax/xml/stream/stax-api/1.0-2/stax-api-1.0-2.jar:/Users/andrew/.m2/repository/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13.jar:/Users/andrew/.m2/repository/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-jobclient/2.7.4/hadoop-mapreduce-client-jobclient-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/hadoop/hadoop-annotations/2.7.4/hadoop-annotations-2.7.4.jar:/Users/andrew/.m2/repository/org/apache/spark/spark-launcher_2.12/3.0.0/spark-launcher_2.12-3.0.0.jar:/Users/andrew/.m2/repository/org/apache/spark/spark-kvstore_2.12/3.0.0/spark-kvstore_2.12-3.0.0.jar:/Users/andrew/.m2/repository/org/fusesource/leveldbjni/leveldbjni-all/1.8/leveldbjni-all-1.8.jar:/Users/andrew/.m2/repository/com/fasterxml/jackson/core/jackson-core/2.10.0/jackson-core-2.10.0.jar:/Users/andrew/.m2/repository/com/fasterxml/jackson/core/jackson-annotations/2.10.0/jackson-annotations-2.10.0.jar:/Users/andrew/.m2/repository/org/apache/spark/spark-network-common_2.12/3.0.0/spark-network-common_2.12-3.0.0.jar:/Users/andrew/.m2/repository/org/apache/spark/spark-network-shuffle_2.12/3.0.0/spark-network-shuffle_2.12-3.0.0.jar:/Users/andrew/.m2/repository/org/apache/spark/spark-unsafe_2.12/3.0.0/spark-unsafe_2.12-3.0.0.jar:/Users/andrew/.m2/repository/javax/activation/activation/1.1.1/activation-1.1.1.jar:/Users/andrew/.m2/repository/org/apache/curator/curator-recipes/2.7.1/curator-recipes-2.7.1.jar:/Users/andrew/.m2/repository/org/apache/curator/curator-framework/2.7.1/curator-framework-2.7.1.jar:/Users/andrew/.m2/repository/com/google/guava/guava/16.0.1/guava-16.0.1.jar:/Users/andrew/.m2/repository/org/apache/zookeeper/zookeeper/3.4.14/zookeeper-3.4.14.jar:/Users/andrew/.m2/repository/org/apache/yetus/audience-annotations/0.5.0/audience-annotations-0.5.0.jar:/Users/andrew/.m2/repository/javax/servlet/javax.servlet-api/3.1.0/javax.servlet-api-3.1.0.jar:/Users/andrew/.m2/repository/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar:/Users/andrew/.m2/repository/org/apache/commons/commons-math3/3.4.1/commons-math3-3.4.1.jar:/Users/andrew/.m2/repository/org/apache/commons/commons-text/1.6/commons-text-1.6.jar:/Users/andrew/.m2/repository/com/google/code/findbugs/jsr305/3.0.0/jsr305-3.0.0.jar:/Users/andrew/.m2/repository/org/slf4j/slf4j-api/1.7.30/slf4j-api-1.7.30.jar:/Users/andrew/.m2/repository/org/slf4j/jul-to-slf4j/1.7.30/jul-to-slf4j-1.7.30.jar:/Users/andrew/.m2/repository/org/slf4j/jcl-over-slf4j/1.7.30/jcl-over-slf4j-1.7.30.jar:/Users/andrew/.m2/repository/log4j/log4j/1.2.17/log4j-1.2.17.jar:/Users/andrew/.m2/repository/org/slf4j/slf4j-log4j12/1.7.30/slf4j-log4j12-1.7.30.jar:/Users/andrew/.m2/repository/com/ning/compress-lzf/1.0.3/compress-lzf-1.0.3.jar:/Users/andrew/.m2/repository/org/xerial/snappy/snappy-java/1.1.7.5/snappy-java-1.1.7.5.jar:/Users/andrew/.m2/repository/org/lz4/lz4-java/1.7.1/lz4-java-1.7.1.jar:/Users/andrew/.m2/repository/com/github/luben/zstd-jni/1.4.4-3/zstd-jni-1.4.4-3.jar:/Users/andrew/.m2/repository/org/roaringbitmap/RoaringBitmap/0.7.45/RoaringBitmap-0.7.45.jar:/Users/andrew/.m2/repository/org/roaringbitmap/shims/0.7.45/shims-0.7.45.jar:/Users/andrew/.m2/repository/commons-net/commons-net/3.1/commons-net-3.1.jar:/Users/andrew/.m2/repository/org/scala-lang/modules/scala-xml_2.12/1.2.0/scala-xml_2.12-1.2.0.jar:/Users/andrew/.m2/repository/org/scala-lang/scala-library/2.12.10/scala-library-2.12.10.jar:/Users/andrew/.m2/repository/org/scala-lang/scala-reflect/2.12.10/scala-reflect-2.12.10.jar:/Users/andrew/.m2/repository/org/json4s/json4s-jackson_2.12/3.6.6/json4s-jackson_2.12-3.6.6.jar:/Users/andrew/.m2/repository/org/json4s/json4s-core_2.12/3.6.6/json4s-core_2.12-3.6.6.jar:/Users/andrew/.m2/repository/org/json4s/json4s-ast_2.12/3.6.6/json4s-ast_2.12-3.6.6.jar:/Users/andrew/.m2/repository/org/json4s/json4s-scalap_2.12/3.6.6/json4s-scalap_2.12-3.6.6.jar:/Users/andrew/.m2/repository/org/glassfish/jersey/core/jersey-client/2.30/jersey-client-2.30.jar:/Users/andrew/.m2/repository/jakarta/ws/rs/jakarta.ws.rs-api/2.1.6/jakarta.ws.rs-api-2.1.6.jar:/Users/andrew/.m2/repository/org/glassfish/hk2/external/jakarta.inject/2.6.1/jakarta.inject-2.6.1.jar:/Users/andrew/.m2/repository/org/glassfish/jersey/core/jersey-common/2.30/jersey-common-2.30.jar:/Users/andrew/.m2/repository/jakarta/annotation/jakarta.annotation-api/1.3.5/jakarta.annotation-api-1.3.5.jar:/Users/andrew/.m2/repository/org/glassfish/hk2/osgi-resource-locator/1.0.3/osgi-resource-locator-1.0.3.jar:/Users/andrew/.m2/repository/org/glassfish/jersey/core/jersey-server/2.30/jersey-server-2.30.jar:/Users/andrew/.m2/repository/org/glassfish/jersey/media/jersey-media-jaxb/2.30/jersey-media-jaxb-2.30.jar:/Users/andrew/.m2/repository/jakarta/validation/jakarta.validation-api/2.0.2/jakarta.validation-api-2.0.2.jar:/Users/andrew/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet/2.30/jersey-container-servlet-2.30.jar:/Users/andrew/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet-core/2.30/jersey-container-servlet-core-2.30.jar:/Users/andrew/.m2/repository/org/glassfish/jersey/inject/jersey-hk2/2.30/jersey-hk2-2.30.jar:/Users/andrew/.m2/repository/org/glassfish/hk2/hk2-locator/2.6.1/hk2-locator-2.6.1.jar:/Users/andrew/.m2/repository/org/glassfish/hk2/external/aopalliance-repackaged/2.6.1/aopalliance-repackaged-2.6.1.jar:/Users/andrew/.m2/repository/org/glassfish/hk2/hk2-api/2.6.1/hk2-api-2.6.1.jar:/Users/andrew/.m2/repository/org/glassfish/hk2/hk2-utils/2.6.1/hk2-utils-2.6.1.jar:/Users/andrew/.m2/repository/org/javassist/javassist/3.25.0-GA/javassist-3.25.0-GA.jar:/Users/andrew/.m2/repository/io/netty/netty-all/4.1.47.Final/netty-all-4.1.47.Final.jar:/Users/andrew/.m2/repository/com/clearspring/analytics/stream/2.9.6/stream-2.9.6.jar:/Users/andrew/.m2/repository/io/dropwizard/metrics/metrics-core/4.1.1/metrics-core-4.1.1.jar:/Users/andrew/.m2/repository/io/dropwizard/metrics/metrics-jvm/4.1.1/metrics-jvm-4.1.1.jar:/Users/andrew/.m2/repository/io/dropwizard/metrics/metrics-json/4.1.1/metrics-json-4.1.1.jar:/Users/andrew/.m2/repository/io/dropwizard/metrics/metrics-graphite/4.1.1/metrics-graphite-4.1.1.jar:/Users/andrew/.m2/repository/io/dropwizard/metrics/metrics-jmx/4.1.1/metrics-jmx-4.1.1.jar:/Users/andrew/.m2/repository/com/fasterxml/jackson/core/jackson-databind/2.10.0/jackson-databind-2.10.0.jar:/Users/andrew/.m2/repository/com/fasterxml/jackson/module/jackson-module-scala_2.12/2.10.0/jackson-module-scala_2.12-2.10.0.jar:/Users/andrew/.m2/repository/com/fasterxml/jackson/module/jackson-module-paranamer/2.10.0/jackson-module-paranamer-2.10.0.jar:/Users/andrew/.m2/repository/org/apache/ivy/ivy/2.4.0/ivy-2.4.0.jar:/Users/andrew/.m2/repository/oro/oro/2.0.8/oro-2.0.8.jar:/Users/andrew/.m2/repository/net/razorvine/pyrolite/4.30/pyrolite-4.30.jar:/Users/andrew/.m2/repository/net/sf/py4j/py4j/0.10.9/py4j-0.10.9.jar:/Users/andrew/.m2/repository/org/apache/spark/spark-tags_2.12/3.0.0/spark-tags_2.12-3.0.0.jar:/Users/andrew/.m2/repository/org/apache/commons/commons-crypto/1.0.0/commons-crypto-1.0.0.jar:/Users/andrew/.m2/repository/org/spark-project/spark/unused/1.0.0/unused-1.0.0.jar Transformation
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/Users/andrew/.m2/repository/org/apache/spark/spark-unsafe_2.12/3.0.0/spark-unsafe_2.12-3.0.0.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
20/09/01 00:37:42 INFO SparkContext: Running Spark version 3.0.0
20/09/01 00:37:42 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
20/09/01 00:37:42 INFO ResourceUtils: ==============================================================
20/09/01 00:37:42 INFO ResourceUtils: Resources for spark.driver:

20/09/01 00:37:42 INFO ResourceUtils: ==============================================================
20/09/01 00:37:42 INFO SparkContext: Submitted application: Transformation1
20/09/01 00:37:42 INFO SecurityManager: Changing view acls to: andrew
20/09/01 00:37:42 INFO SecurityManager: Changing modify acls to: andrew
20/09/01 00:37:42 INFO SecurityManager: Changing view acls groups to: 
20/09/01 00:37:42 INFO SecurityManager: Changing modify acls groups to: 
20/09/01 00:37:42 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(andrew); groups with view permissions: Set(); users  with modify permissions: Set(andrew); groups with modify permissions: Set()
20/09/01 00:37:43 INFO Utils: Successfully started service 'sparkDriver' on port 58461.
20/09/01 00:37:43 INFO SparkEnv: Registering MapOutputTracker
20/09/01 00:37:43 INFO SparkEnv: Registering BlockManagerMaster
20/09/01 00:37:43 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
20/09/01 00:37:43 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
20/09/01 00:37:43 INFO SparkEnv: Registering BlockManagerMasterHeartbeat
20/09/01 00:37:43 INFO DiskBlockManager: Created local directory at /private/var/folders/s8/_5116jwj03l5sdj__s1jv4pr0000gn/T/blockmgr-8f21abfe-add2-4c03-862b-e8b4afa37473
20/09/01 00:37:43 INFO MemoryStore: MemoryStore started with capacity 9.4 GiB
20/09/01 00:37:43 INFO SparkEnv: Registering OutputCommitCoordinator
20/09/01 00:37:43 INFO Utils: Successfully started service 'SparkUI' on port 4040.
20/09/01 00:37:43 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://imac-pro:4040
20/09/01 00:37:43 INFO Executor: Starting executor ID driver on host imac-pro
20/09/01 00:37:43 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 58462.
20/09/01 00:37:43 INFO NettyBlockTransferService: Server created on imac-pro:58462
20/09/01 00:37:43 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
20/09/01 00:37:43 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, imac-pro, 58462, None)
20/09/01 00:37:43 INFO BlockManagerMasterEndpoint: Registering block manager imac-pro:58462 with 9.4 GiB RAM, BlockManagerId(driver, imac-pro, 58462, None)
20/09/01 00:37:43 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, imac-pro, 58462, None)
20/09/01 00:37:43 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, imac-pro, 58462, None)
20/09/01 00:37:44 INFO SparkContext: Starting job: count at Transformation.scala:12
20/09/01 00:37:44 INFO DAGScheduler: Got job 0 (count at Transformation.scala:12) with 2 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 0 (count at Transformation.scala:12)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List()
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 0 (UnionRDD[2] at union at Transformation.scala:11), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 2.5 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 1615.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on imac-pro:58462 (size: 1615.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 0 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 0 (UnionRDD[2] at union at Transformation.scala:11) (first 15 tasks are for partitions Vector(0, 1))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 0.0 with 2 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 875 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, imac-pro, executor driver, partition 1, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 1.0 in stage 0.0 (TID 1)
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 79 ms on imac-pro (executor driver) (1/2)
20/09/01 00:37:44 INFO Executor: Finished task 1.0 in stage 0.0 (TID 1). 875 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 9 ms on imac-pro (executor driver) (2/2)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ResultStage 0 (count at Transformation.scala:12) finished in 0.347 s
20/09/01 00:37:44 INFO DAGScheduler: Job 0 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:44 INFO TaskSchedulerImpl: Killing all running tasks in stage 0: Stage finished
20/09/01 00:37:44 INFO DAGScheduler: Job 0 finished: count at Transformation.scala:12, took 0.381521 s
4
20/09/01 00:37:44 INFO SparkContext: Starting job: collect at Transformation.scala:13
20/09/01 00:37:44 INFO DAGScheduler: Got job 1 (collect at Transformation.scala:13) with 2 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 1 (collect at Transformation.scala:13)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List()
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 1 (UnionRDD[2] at union at Transformation.scala:11), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 2.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 1614.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on imac-pro:58462 (size: 1614.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 1 (UnionRDD[2] at union at Transformation.scala:11) (first 15 tasks are for partitions Vector(0, 1))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 1.0 with 2 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 2, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 1.0 (TID 2)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 1.0 (TID 2). 979 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Starting task 1.0 in stage 1.0 (TID 3, imac-pro, executor driver, partition 1, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 1.0 in stage 1.0 (TID 3)
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 2) in 8 ms on imac-pro (executor driver) (1/2)
20/09/01 00:37:44 INFO Executor: Finished task 1.0 in stage 1.0 (TID 3). 979 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 1.0 in stage 1.0 (TID 3) in 6 ms on imac-pro (executor driver) (2/2)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ResultStage 1 (collect at Transformation.scala:13) finished in 0.020 s
20/09/01 00:37:44 INFO DAGScheduler: Job 1 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:44 INFO TaskSchedulerImpl: Killing all running tasks in stage 1: Stage finished
20/09/01 00:37:44 INFO DAGScheduler: Job 1 finished: collect at Transformation.scala:13, took 0.023857 s
4
20/09/01 00:37:44 INFO SparkContext: Starting job: collect at Transformation.scala:14
20/09/01 00:37:44 INFO DAGScheduler: Got job 2 (collect at Transformation.scala:14) with 2 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 2 (collect at Transformation.scala:14)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List()
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 2 (UnionRDD[2] at union at Transformation.scala:11), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 2.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 1614.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on imac-pro:58462 (size: 1614.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 2 (UnionRDD[2] at union at Transformation.scala:11) (first 15 tasks are for partitions Vector(0, 1))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 2.0 with 2 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 4, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 2.0 (TID 4)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 2.0 (TID 4). 979 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Starting task 1.0 in stage 2.0 (TID 5, imac-pro, executor driver, partition 1, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 1.0 in stage 2.0 (TID 5)
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 4) in 7 ms on imac-pro (executor driver) (1/2)
20/09/01 00:37:44 INFO Executor: Finished task 1.0 in stage 2.0 (TID 5). 936 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 1.0 in stage 2.0 (TID 5) in 4 ms on imac-pro (executor driver) (2/2)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ResultStage 2 (collect at Transformation.scala:14) finished in 0.020 s
20/09/01 00:37:44 INFO DAGScheduler: Job 2 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:44 INFO TaskSchedulerImpl: Killing all running tasks in stage 2: Stage finished
20/09/01 00:37:44 INFO DAGScheduler: Job 2 finished: collect at Transformation.scala:14, took 0.023944 s
20/09/01 00:37:44 INFO SparkContext: Starting job: collect at Transformation.scala:14
20/09/01 00:37:44 INFO DAGScheduler: Got job 3 (collect at Transformation.scala:14) with 2 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 3 (collect at Transformation.scala:14)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List()
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 3 (UnionRDD[2] at union at Transformation.scala:11), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 2.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 1614.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on imac-pro:58462 (size: 1614.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 3 (UnionRDD[2] at union at Transformation.scala:11) (first 15 tasks are for partitions Vector(0, 1))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 3.0 with 2 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 6, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 3.0 (TID 6)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 3.0 (TID 6). 979 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Starting task 1.0 in stage 3.0 (TID 7, imac-pro, executor driver, partition 1, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 6) in 7 ms on imac-pro (executor driver) (1/2)
20/09/01 00:37:44 INFO Executor: Running task 1.0 in stage 3.0 (TID 7)
20/09/01 00:37:44 INFO Executor: Finished task 1.0 in stage 3.0 (TID 7). 979 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 1.0 in stage 3.0 (TID 7) in 6 ms on imac-pro (executor driver) (2/2)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ResultStage 3 (collect at Transformation.scala:14) finished in 0.023 s
20/09/01 00:37:44 INFO DAGScheduler: Job 3 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:44 INFO TaskSchedulerImpl: Killing all running tasks in stage 3: Stage finished
20/09/01 00:37:44 INFO DAGScheduler: Job 3 finished: collect at Transformation.scala:14, took 0.025578 s
20/09/01 00:37:44 INFO SparkContext: Starting job: collect at Transformation.scala:14
20/09/01 00:37:44 INFO DAGScheduler: Got job 4 (collect at Transformation.scala:14) with 2 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 4 (collect at Transformation.scala:14)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List()
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 4 (UnionRDD[2] at union at Transformation.scala:11), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 2.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 1614.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on imac-pro:58462 (size: 1614.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 4 (UnionRDD[2] at union at Transformation.scala:11) (first 15 tasks are for partitions Vector(0, 1))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 4.0 with 2 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 4.0 (TID 8, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 4.0 (TID 8)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 4.0 (TID 8). 979 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Starting task 1.0 in stage 4.0 (TID 9, imac-pro, executor driver, partition 1, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 4.0 (TID 8) in 6 ms on imac-pro (executor driver) (1/2)
20/09/01 00:37:44 INFO Executor: Running task 1.0 in stage 4.0 (TID 9)
20/09/01 00:37:44 INFO Executor: Finished task 1.0 in stage 4.0 (TID 9). 936 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 1.0 in stage 4.0 (TID 9) in 4 ms on imac-pro (executor driver) (2/2)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ResultStage 4 (collect at Transformation.scala:14) finished in 0.018 s
20/09/01 00:37:44 INFO DAGScheduler: Job 4 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:44 INFO TaskSchedulerImpl: Killing all running tasks in stage 4: Stage finished
20/09/01 00:37:44 INFO DAGScheduler: Job 4 finished: collect at Transformation.scala:14, took 0.021573 s
20/09/01 00:37:44 INFO SparkContext: Starting job: collect at Transformation.scala:14
20/09/01 00:37:44 INFO DAGScheduler: Got job 5 (collect at Transformation.scala:14) with 2 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 5 (collect at Transformation.scala:14)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List()
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 5 (UnionRDD[2] at union at Transformation.scala:11), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 2.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 1614.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on imac-pro:58462 (size: 1614.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 5 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 5 (UnionRDD[2] at union at Transformation.scala:11) (first 15 tasks are for partitions Vector(0, 1))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 5.0 with 2 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 5.0 (TID 10, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 5.0 (TID 10)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 5.0 (TID 10). 936 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Starting task 1.0 in stage 5.0 (TID 11, imac-pro, executor driver, partition 1, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 1.0 in stage 5.0 (TID 11)
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 5.0 (TID 10) in 5 ms on imac-pro (executor driver) (1/2)
20/09/01 00:37:44 INFO Executor: Finished task 1.0 in stage 5.0 (TID 11). 893 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 1.0 in stage 5.0 (TID 11) in 3 ms on imac-pro (executor driver) (2/2)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ResultStage 5 (collect at Transformation.scala:14) finished in 0.016 s
20/09/01 00:37:44 INFO DAGScheduler: Job 5 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:44 INFO TaskSchedulerImpl: Killing all running tasks in stage 5: Stage finished
20/09/01 00:37:44 INFO DAGScheduler: Job 5 finished: collect at Transformation.scala:14, took 0.018449 s
20/09/01 00:37:44 INFO SparkContext: Starting job: collect at Transformation.scala:14
20/09/01 00:37:44 INFO DAGScheduler: Got job 6 (collect at Transformation.scala:14) with 2 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 6 (collect at Transformation.scala:14)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List()
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 6 (UnionRDD[2] at union at Transformation.scala:11), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_6 stored as values in memory (estimated size 2.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 1614.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_6_piece0 in memory on imac-pro:58462 (size: 1614.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 6 (UnionRDD[2] at union at Transformation.scala:11) (first 15 tasks are for partitions Vector(0, 1))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 6.0 with 2 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 6.0 (TID 12, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 6.0 (TID 12)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 6.0 (TID 12). 979 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Starting task 1.0 in stage 6.0 (TID 13, imac-pro, executor driver, partition 1, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 1.0 in stage 6.0 (TID 13)
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 6.0 (TID 12) in 5 ms on imac-pro (executor driver) (1/2)
20/09/01 00:37:44 INFO Executor: Finished task 1.0 in stage 6.0 (TID 13). 893 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 1.0 in stage 6.0 (TID 13) in 4 ms on imac-pro (executor driver) (2/2)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 6.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ResultStage 6 (collect at Transformation.scala:14) finished in 0.015 s
20/09/01 00:37:44 INFO DAGScheduler: Job 6 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:44 INFO TaskSchedulerImpl: Killing all running tasks in stage 6: Stage finished
20/09/01 00:37:44 INFO DAGScheduler: Job 6 finished: collect at Transformation.scala:14, took 0.019002 s
20/09/01 00:37:44 INFO SparkContext: Starting job: collect at Transformation.scala:15
20/09/01 00:37:44 INFO DAGScheduler: Got job 7 (collect at Transformation.scala:15) with 2 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 7 (collect at Transformation.scala:15)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List()
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 7 (UnionRDD[2] at union at Transformation.scala:11), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_7 stored as values in memory (estimated size 2.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 1614.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on imac-pro:58462 (size: 1614.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 7 (UnionRDD[2] at union at Transformation.scala:11) (first 15 tasks are for partitions Vector(0, 1))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 7.0 with 2 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 7.0 (TID 14, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 7.0 (TID 14)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 7.0 (TID 14). 936 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Starting task 1.0 in stage 7.0 (TID 15, imac-pro, executor driver, partition 1, PROCESS_LOCAL, 7584 bytes)
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 7.0 (TID 14) in 4 ms on imac-pro (executor driver) (1/2)
20/09/01 00:37:44 INFO Executor: Running task 1.0 in stage 7.0 (TID 15)
20/09/01 00:37:44 INFO Executor: Finished task 1.0 in stage 7.0 (TID 15). 936 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 1.0 in stage 7.0 (TID 15) in 5 ms on imac-pro (executor driver) (2/2)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 7.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ResultStage 7 (collect at Transformation.scala:15) finished in 0.015 s
20/09/01 00:37:44 INFO DAGScheduler: Job 7 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:44 INFO TaskSchedulerImpl: Killing all running tasks in stage 7: Stage finished
20/09/01 00:37:44 INFO DAGScheduler: Job 7 finished: collect at Transformation.scala:15, took 0.017839 s
a:1
b:1
c:1
d:1
20/09/01 00:37:44 INFO SparkContext: Starting job: foreach at Transformation.scala:22
20/09/01 00:37:44 INFO DAGScheduler: Registering RDD 5 (distinct at Transformation.scala:20) as input to shuffle 0
20/09/01 00:37:44 INFO DAGScheduler: Got job 8 (foreach at Transformation.scala:22) with 1 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 9 (foreach at Transformation.scala:22)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 8)
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 8)
20/09/01 00:37:44 INFO DAGScheduler: Submitting ShuffleMapStage 8 (MapPartitionsRDD[5] at distinct at Transformation.scala:20), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_8 stored as values in memory (estimated size 4.7 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 2.7 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_8_piece0 in memory on imac-pro:58462 (size: 2.7 KiB, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 8 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 8 (MapPartitionsRDD[5] at distinct at Transformation.scala:20) (first 15 tasks are for partitions Vector(0))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 8.0 with 1 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 8.0 (TID 16, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7478 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 8.0 (TID 16)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 8.0 (TID 16). 1157 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 8.0 (TID 16) in 44 ms on imac-pro (executor driver) (1/1)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 8.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ShuffleMapStage 8 (distinct at Transformation.scala:20) finished in 0.076 s
20/09/01 00:37:44 INFO DAGScheduler: looking for newly runnable stages
20/09/01 00:37:44 INFO DAGScheduler: running: Set()
20/09/01 00:37:44 INFO DAGScheduler: waiting: Set(ResultStage 9)
20/09/01 00:37:44 INFO DAGScheduler: failed: Set()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 9 (MapPartitionsRDD[7] at distinct at Transformation.scala:20), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_9 stored as values in memory (estimated size 4.8 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_9_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_9_piece0 in memory on imac-pro:58462 (size: 2.6 KiB, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 9 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 9 (MapPartitionsRDD[7] at distinct at Transformation.scala:20) (first 15 tasks are for partitions Vector(0))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 9.0 with 1 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 9.0 (TID 17, imac-pro, executor driver, partition 0, NODE_LOCAL, 7143 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 9.0 (TID 17)
20/09/01 00:37:44 INFO ShuffleBlockFetcherIterator: Getting 1 (189.0 B) non-empty blocks including 1 (189.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks
20/09/01 00:37:44 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 6 ms
(b,1)
(a,1)
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 9.0 (TID 17). 1267 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 9.0 (TID 17) in 44 ms on imac-pro (executor driver) (1/1)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 9.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ResultStage 9 (foreach at Transformation.scala:22) finished in 0.056 s
20/09/01 00:37:44 INFO DAGScheduler: Job 8 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:44 INFO TaskSchedulerImpl: Killing all running tasks in stage 9: Stage finished
20/09/01 00:37:44 INFO DAGScheduler: Job 8 finished: foreach at Transformation.scala:22, took 0.152104 s
20/09/01 00:37:44 INFO SparkContext: Starting job: foreach at Transformation.scala:32
20/09/01 00:37:44 INFO DAGScheduler: Registering RDD 3 (parallelize at Transformation.scala:18) as input to shuffle 1
20/09/01 00:37:44 INFO DAGScheduler: Registering RDD 4 (parallelize at Transformation.scala:19) as input to shuffle 2
20/09/01 00:37:44 INFO DAGScheduler: Got job 9 (foreach at Transformation.scala:32) with 1 output partitions
20/09/01 00:37:44 INFO DAGScheduler: Final stage: ResultStage 12 (foreach at Transformation.scala:32)
20/09/01 00:37:44 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 10, ShuffleMapStage 11)
20/09/01 00:37:44 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 10, ShuffleMapStage 11)
20/09/01 00:37:44 INFO DAGScheduler: Submitting ShuffleMapStage 10 (ParallelCollectionRDD[3] at parallelize at Transformation.scala:18), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_10 stored as values in memory (estimated size 2.9 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_10_piece0 stored as bytes in memory (estimated size 1890.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_10_piece0 in memory on imac-pro:58462 (size: 1890.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 10 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 10 (ParallelCollectionRDD[3] at parallelize at Transformation.scala:18) (first 15 tasks are for partitions Vector(0))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 10.0 with 1 tasks
20/09/01 00:37:44 INFO DAGScheduler: Submitting ShuffleMapStage 11 (ParallelCollectionRDD[4] at parallelize at Transformation.scala:19), which has no missing parents
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 10.0 (TID 18, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7478 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 10.0 (TID 18)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_11 stored as values in memory (estimated size 2.9 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_11_piece0 stored as bytes in memory (estimated size 1889.0 B, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_11_piece0 in memory on imac-pro:58462 (size: 1889.0 B, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 11 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 11 (ParallelCollectionRDD[4] at parallelize at Transformation.scala:19) (first 15 tasks are for partitions Vector(0))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 11.0 with 1 tasks
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 10.0 (TID 18). 1028 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 11.0 (TID 19, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7534 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 11.0 (TID 19)
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 10.0 (TID 18) in 16 ms on imac-pro (executor driver) (1/1)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 10.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ShuffleMapStage 10 (parallelize at Transformation.scala:18) finished in 0.023 s
20/09/01 00:37:44 INFO DAGScheduler: looking for newly runnable stages
20/09/01 00:37:44 INFO DAGScheduler: running: Set(ShuffleMapStage 11)
20/09/01 00:37:44 INFO DAGScheduler: waiting: Set(ResultStage 12)
20/09/01 00:37:44 INFO DAGScheduler: failed: Set()
20/09/01 00:37:44 INFO Executor: Finished task 0.0 in stage 11.0 (TID 19). 1028 bytes result sent to driver
20/09/01 00:37:44 INFO TaskSetManager: Finished task 0.0 in stage 11.0 (TID 19) in 8 ms on imac-pro (executor driver) (1/1)
20/09/01 00:37:44 INFO TaskSchedulerImpl: Removed TaskSet 11.0, whose tasks have all completed, from pool 
20/09/01 00:37:44 INFO DAGScheduler: ShuffleMapStage 11 (parallelize at Transformation.scala:19) finished in 0.023 s
20/09/01 00:37:44 INFO DAGScheduler: looking for newly runnable stages
20/09/01 00:37:44 INFO DAGScheduler: running: Set()
20/09/01 00:37:44 INFO DAGScheduler: waiting: Set(ResultStage 12)
20/09/01 00:37:44 INFO DAGScheduler: failed: Set()
20/09/01 00:37:44 INFO DAGScheduler: Submitting ResultStage 12 (MapPartitionsRDD[10] at join at Transformation.scala:30), which has no missing parents
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_12 stored as values in memory (estimated size 4.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO MemoryStore: Block broadcast_12_piece0 stored as bytes in memory (estimated size 2.6 KiB, free 9.4 GiB)
20/09/01 00:37:44 INFO BlockManagerInfo: Added broadcast_12_piece0 in memory on imac-pro:58462 (size: 2.6 KiB, free: 9.4 GiB)
20/09/01 00:37:44 INFO SparkContext: Created broadcast 12 from broadcast at DAGScheduler.scala:1200
20/09/01 00:37:44 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 12 (MapPartitionsRDD[10] at join at Transformation.scala:30) (first 15 tasks are for partitions Vector(0))
20/09/01 00:37:44 INFO TaskSchedulerImpl: Adding task set 12.0 with 1 tasks
20/09/01 00:37:44 INFO TaskSetManager: Starting task 0.0 in stage 12.0 (TID 20, imac-pro, executor driver, partition 0, PROCESS_LOCAL, 7206 bytes)
20/09/01 00:37:44 INFO Executor: Running task 0.0 in stage 12.0 (TID 20)
20/09/01 00:37:45 INFO ShuffleBlockFetcherIterator: Getting 1 (189.0 B) non-empty blocks including 1 (189.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks
20/09/01 00:37:45 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
20/09/01 00:37:45 INFO ShuffleBlockFetcherIterator: Getting 1 (228.0 B) non-empty blocks including 1 (228.0 B) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks
20/09/01 00:37:45 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
(a,(1,1))
(a,(1,2))
(a,(1,1))
(a,(1,2))
(b,(1,1))
(b,(1,2))
(b,(1,3))
20/09/01 00:37:45 INFO Executor: Finished task 0.0 in stage 12.0 (TID 20). 1267 bytes result sent to driver
20/09/01 00:37:45 INFO TaskSetManager: Finished task 0.0 in stage 12.0 (TID 20) in 22 ms on imac-pro (executor driver) (1/1)
20/09/01 00:37:45 INFO TaskSchedulerImpl: Removed TaskSet 12.0, whose tasks have all completed, from pool 
20/09/01 00:37:45 INFO DAGScheduler: ResultStage 12 (foreach at Transformation.scala:32) finished in 0.033 s
20/09/01 00:37:45 INFO DAGScheduler: Job 9 is finished. Cancelling potential speculative or zombie tasks for this job
20/09/01 00:37:45 INFO TaskSchedulerImpl: Killing all running tasks in stage 12: Stage finished
20/09/01 00:37:45 INFO DAGScheduler: Job 9 finished: foreach at Transformation.scala:32, took 0.068357 s
20/09/01 00:37:45 INFO SparkContext: Invoking stop() from shutdown hook
20/09/01 00:37:45 INFO SparkUI: Stopped Spark web UI at http://imac-pro:4040
20/09/01 00:37:45 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
20/09/01 00:37:45 INFO MemoryStore: MemoryStore cleared
20/09/01 00:37:45 INFO BlockManager: BlockManager stopped
20/09/01 00:37:45 INFO BlockManagerMaster: BlockManagerMaster stopped
20/09/01 00:37:45 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
20/09/01 00:37:45 INFO SparkContext: Successfully stopped SparkContext
20/09/01 00:37:45 INFO ShutdownHookManager: Shutdown hook called
20/09/01 00:37:45 INFO ShutdownHookManager: Deleting directory /private/var/folders/s8/_5116jwj03l5sdj__s1jv4pr0000gn/T/spark-589a6f0f-7c21-4e87-a4ea-033c9a08b667

Process finished with exit code 0

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值