mysql collectset_Spark SQL里concat_ws和collect_set的作用

最新推荐文章于 2023-03-09 20:47:04 发布

陆骏秋律师

最新推荐文章于 2023-03-09 20:47:04 发布

阅读量267

点赞数

文章标签： mysql collectset

本文链接：https://blog.csdn.net/weixin_36377635/article/details/113425380

版权

public classTpcCompute2 {public static voidmain(String[] args) {

SparkSession spark= SparkSession.builder().enableHiveSupport().appName("TpcCompute2").master("local").getOrCreate();

JavaSparkContext sc= newJavaSparkContext(spark.sparkContext());

sc.setLogLevel("ERROR");//hphm,id,tgsj,lonlat&

spark.udf().register("getTpc", newComputeUDF(), DataTypes.StringType);

spark.sql("use traffic");

spark.sql("select hphm,concat_ws('&',collect_set(concat_ws('_',id,kk_lon_lat,tgsj))) as concatValue from t_cltgxx t where t.tgsj>'2015-01-01 00:00:00' group by hphm").show(false);

Dataset cltgxxDF =spark.sql("select hphm,concatValue from (select hphm,getTpc(concat_ws('&',collect_set(concat_ws('_',id,kk_lon_lat,tgsj)))) as concatValue from t_cltgxx t where t.tgsj>'2015-01-01 00:00:00' group by hphm) where concatValue is not null");

cltgxxDF.show();//创建集合累加器

CollectionAccumulator acc =sc.sc().collectionAccumulator();

cltgxxDF.foreach(new ForeachFunction() {

@Overridepublic voidcall(Row row) throws Exception {

acc.add(row.getAs("concatValue"));

}

});

List values =acc.value();for(String id : accValues) {

System.out.println("accValues:" +id);

Dataset resultDF = spark.sql("select hphm,clpp,clys,tgsj,kkbh from t_cltgxx where id in (" + id.split("_")[0] + "," + id.split("_")[1] + ")");

resultDF.show();

Dataset resultDF2 = resultDF.withColumn("jsbh", functions.lit(newDate().getTime()))