import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.*; import scala.Tuple2; import java.util.Arrays; import java.util.Iterator; public class SparkTest { public static void main(String[] args){ SparkConf sparkConf = new SparkConf().setAppName("AvgAgeCalculator"); // 提交到集群 // sparkConf.setMaster("spark://192.168.1.126:7077"); // idea 里面运行 sparkConf.setMaster("local[2]"); JavaSparkContext sc = new JavaSparkContext(sparkConf); //读取文件 // JavaRDD<String> dataFile = sc.textFile(args[0]); JavaRDD<String> dataFile = sc.textFile("/Users/zzy/Downloads/hive/teskshanda/DataFile2.txt"); //数据分片并取第二个数 JavaRDD<String> ageData = dataFile.flatMap(new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) throws Exception { return Arrays.asList(s.split(" ")[1]); } }); //求出所有年龄个数。 long count = ageData.count(); //转换数据类型 JavaRDD<Integer> ageDataInt = ageData.map(new Function<String, Integer>() { @Override public Integer call(String s) throws Exception { return Integer.parseInt(String.valueOf(s)); } }); //求出年龄的和 Integer totalAge = ageDataInt.reduce(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer x, Integer y) throws Exception { return x+y; } }); //平均值结果为double类型 Double avgAge = totalAge.doubleValue()/count; /*System.out.println(ageData.collect()); System.out.println(count);*/ System.out.println("Total Age:" + totalAge + "; Number of People:" + count ); System.out.println("Average Age is " + avgAge); } }
SparkTest
最新推荐文章于 2024-05-23 09:38:36 发布