模拟数据的java代码 // 模拟数据
// 10万个人当中,统计青年男性和青年女性的比例,看看男女比例是否均衡
FileOutputStream f = null;
ThreadLocalRandom random = ThreadLocalRandom.current();
String str = "";
int count = 0;
try {
f = new FileOutputStream("C:\\Users\\26401\\Desktop\\data.txt", true);
for(;count<100000;count++) {
str = count + " " + random.nextInt(18, 28) + " " + (random.nextBoolean()?'M':'F');
f.write((str + "\r\n").getBytes());
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if(f != null) f.close();
} catch (IOException e) {
e.printStackTrace();
}
}
依赖
4.0.0
test
test
1.0.0
test
Test project for spring boot mybatis
jar
UTF-8
UTF-8
1.8
1.8
1.8
org.apache.spark
spark-core_2.12
2.4.0
org.slf4j
slf4j-api
1.7.25
junit
junit
3.8.1
org.apache.maven.plugins
maven-jar-plugin
true
false
lib/
java代码 package test;
import java.io.Serializable;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class App implements Serializable
{
private static final long serialVersionUID = -7114915627898482737L;
public static void main(String[] args) throws Exception {
Logger logger=LoggerFactory.getLogger(App.class);
SparkConf sparkConf = new SparkConf();
sparkConf.setMaster("spark://master:7077");
sparkConf.set("spark.submit.deployMode", "cluster");
sparkConf.setAppName("FirstTest");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
JavaRDD file = sc.textFile("hdfs://master:9000/data.txt");
JavaRDD male = file.filter(new Function() {
private static final long serialVersionUID = 1L;
@Override
public Boolean call(String s) throws Exception {
logger.info(s);
return s.contains("M");
}
});
logger.info("**************************************");
logger.info(male.count()+""); // 49991
logger.info("**************************************");
sc.close();
// 其他的api请自行查阅,很简单,不想看,可以自己瞎点
}
}
运行 1. 将生成的测试数据data.txt上传至hdfs
2. 将打包的jar上传到master机器
3. 运行 bin/spark-submit --master spark://master:7077 --class test.App test-1.0.0.jar
4. 进入spark的ui界面可以清楚的看到打印的消息