1.下载apache-maven-3.5.0-bin.tar,并设置MAVEN_HOME.
2. 下载hadoop-2.6.0.tar,并设置HADOOP_HOME.
3.POM配置。
<dependency> <!-- Spark dependency -->
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.2.0</version>
<scope>provided</scope>
</dependency>
<dependency> <!-- Spark dependency -->
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>com.google.collections</groupId>
<artifactId>google-collections</artifactId>
<version>1.0</version>
</dependency>
4. 创建SimpleApp类。
import java.util.Arrays;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
public class SimpleApp {
public static void main(String[] args) {
//local代表运行本地集群
SparkConf conf = new SparkConf().setAppName("app demo").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
//文件放在工程根目录下面
JavaRDD<String> lines = sc.textFile("test.txt");
JavaRDD<Integer> lineLengths = lines.map(s -> s.length());
int totalLength = lineLengths.reduce((a, b) -> a + b);
System.out.println("length:"+totalLength);
}
}
5.运行MAVEN命令eclipse:clean eclipse:eclipse构建好MAVEN项目。
6.运行SimpleApp