编写java应用程序:
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
public class SimpleApp {
public static void main(String[] args) {
String logFile = "你的文件路径/a.txt"; // Should be some file on your system
SparkSession spark = SparkSession.builder().appName("Simple Application").getOrCreate();
Dataset<String> logData = spark.read().textFile(logFile).cache();
long numBs = logData.filter((FilterFunction<String>) s -> s.contains("test")).count();
System.out.println(", lines with test: " + numBs);
spark.stop();
}
}
对应pom:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>3.4.1</version>
<scope>provided</scope>
</dependency>
运行:
./bin/spark-submit \
--class "SimpleApp" \
--master local[4] \
你的jar包地址