代码
package com.sparkStreaming.Demo10_HA
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.DStream
/**
* Created by Shi shuai RollerQing on 2019/11/16 14:23
*
* SparkStreaming应用之Driver进程的高可用HA
*/
object DriverHADemo {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession
.builder()
.appName(DriverHADemo.getClass.getSimpleName)
.getOrCreate()
val sc : SparkContext = spark.sparkContext
// .master("local[*]") //这个要打包到集群 这个就不用了
val ck = "/spark-streaming/ck_ha"
// val ck = "D:\\installs\\SparkStreamingTest\\data"
val ssc : StreamingContext = StreamingContext.getOrCreate(ck, () => {
val tmpSsc: StreamingContext = new StreamingContext(sc, Seconds(2))
tmpSsc.checkpoint(ck)
val ds: DStream[(String, Int)] = tmpSsc.socketTextStream("hadoop01", 8888, StorageLevel.MEMORY_ONLY)
.flatMap(_.split("\\s+")) // \\s表示 空格,回车,换行等空白符,+号表示一个或多个的意思,所以...
.map((_, 1))
.reduceByKey(_ + _)
ds.print
tmpSsc
})
ssc.start
ssc.awaitTermination
}
}
打包
这个pom文件很重要 整体文件贴到下面
然后这里贴上自己认为重要的
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin> <!-- 程序打包 -->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration> <!-- 过滤掉以下文件,不打包 :解决包重复引用导致的打包错误-->
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers> <!-- 打成可执行的jar包 的主方法入口-->
<!-- intellij idea打成的jar运行的时候没有找到hdfs类型的文件系统 :修改maven项目的pom文件,手动指定-->
<transformer
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass></mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
我运行的时候 有错误 遇到了 Exception in thread “main” java.io.IOException: No FileSystem for scheme: spark 未解决
但最后莫名其妙解决了
命令
./bin/spark-submit \
--class com.sparkStreaming.Demo10_HA.DriverHADemo \
--master spark://hadoop01:7077 \
--deploy-mode cluster \
--supervise \
hdfs://hadoop01:8020/spark-streaming/submitjars/ninthTest.jar
但是上面的命令是改了好多次的 才跑对 草稿命令贴到最后了吧
效果
找到DriverWrapper进程 在第三台节点
去hadoop03 kill它 然后查看 嗯? 怎么没了 ?
查看全部进程
原来复活后跑到第二台节点了
这个图貌似不太对
注意
因为不能通过kill 杀死
所以需要到UI界面 spark的8080端口 去kill掉该application
下图为kill完的截图
草稿
./bin/spark-submit \
--class com.sparkStreaming.Demo10_HA.DriverHADemo\
--master spark://hadoop01:7077 \
--deploy-mode cluster \
--jars /usr/local/hadoop-2.7.1/share/hadoop/hdfs/*.jar /usr/local/spark-2.2.0-bin-hadoop2.7/jars/*.jar /usr/local/hadoop-2.7.1/share/hadoop/common/*.jar \
--supervise \
/home/sparkjar/sixthTest.jar
./bin/spark-submit \
--class com.sparkStreaming.Demo10_HA.DriverHADemo\
--master spark://hadoop01:7077 \
--deploy-mode cluster \
--supervise \
hdfs://hadoop01:8020/spark-streaming/submitjars/thirdTest.jar
./bin/spark-submit \
--class com.sparkStreaming.Demo10_HA.DriverHADemo \
--master spark://hadoop01:7077 \
--deploy-mode cluster \
--supervise \
hdfs://hadoop01:8020/spark-streaming/submitjars/ninthTest.jar
/spark-streaming/submitjars/ninthTest.jar
hdfs://qf/spark-streaming/ck_ha/
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>spark</groupId>
<artifactId>SparkStreamingTest2</artifactId>
<version>1.0-SNAPSHOT</version>
<inceptionYear>2008</inceptionYear>
<properties>
<scala.version>2.11.8</scala.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
<spark.version>2.2.0</spark.version>
<hadoop.version>2.7.1</hadoop.version>
<scala.compat.version>2.11</scala.compat.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${spark.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/org.scala-tools/maven-scala-plugin -->
<dependency>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.11</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
<args>
<arg>-target:jvm-1.8</arg>
</args>
</configuration>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin> <!-- 程序打包 -->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration> <!-- 过滤掉以下文件,不打包 :解决包重复引用导致的打包错误-->
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers> <!-- 打成可执行的jar包 的主方法入口-->
<!-- intellij idea打成的jar运行的时候没有找到hdfs类型的文件系统 :修改maven项目的pom文件,手动指定-->
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass></mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
<!-- <plugin>-->
<!-- <groupId>org.apache.maven.plugins</groupId>-->
<!-- <artifactId>maven-compiler-plugin</artifactId>-->
<!-- <version>2.3.2</version>-->
<!-- <configuration>-->
<!-- <encoding>UTF-8</encoding>-->
<!-- <source>1.8</source>-->
<!-- <target>1.8</target>-->
<!-- <showWarnings>true</showWarnings>-->
<!-- </configuration>-->
<!-- </plugin>-->
<!-- <plugin>-->
<!-- <artifactId>maven-assembly-plugin</artifactId>-->
<!-- <configuration>-->
<!-- <descriptorRefs>-->
<!-- <descriptorRef>jar-with-dependencies</descriptorRef>-->
<!-- </descriptorRefs>-->
<!-- <archive>-->
<!-- <manifest>-->
<!-- <!–<mainClass></mainClass>–>-->
<!-- </manifest>-->
<!-- </archive>-->
<!-- </configuration>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <id>make-assembly</id>-->
<!-- <phase>package</phase>-->
<!-- <goals>-->
<!-- <goal>single</goal>-->
<!-- </goals>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- </plugin>-->
<!-- <plugin>-->
<!-- <groupId>org.apache.maven.plugins</groupId>-->
<!-- <artifactId>maven-shade-plugin</artifactId>-->
<!-- <version>2.4.2</version>-->
<!-- <executions>-->
<!-- <execution>-->
<!-- <phase>package</phase>-->
<!-- <goals>-->
<!-- <goal>shade</goal>-->
<!-- </goals>-->
<!-- <configuration>-->
<!-- <filters>-->
<!-- <filter>-->
<!-- <artifact>*:*</artifact>-->
<!-- <excludes>-->
<!-- <exclude>META-INF/*.SF</exclude>-->
<!-- <exclude>META-INF/*.DSA</exclude>-->
<!-- <exclude>META-INF/*.RSA</exclude>-->
<!-- </excludes>-->
<!-- </filter>-->
<!-- </filters>-->
<!-- <transformers>-->
<!-- <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>-->
<!-- </transformers>-->
<!-- </configuration>-->
<!-- </execution>-->
<!-- </executions>-->
<!-- </plugin>-->
</plugins>
</build>
<reporting>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</reporting>
</project>