MonitorFlowAnalyze.java
package com.bjsxt.spark.skynet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.collections.IteratorUtils;
import org.apache.spark.Accumulator;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.api.java.Optional;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import com.alibaba.fastjson.JSONObject;
import com.bjsxt.spark.conf.ConfigurationManager;
import com.bjsxt.spark.constant.Constants;
import com.bjsxt.spark.dao.IAreaDao;
import com.bjsxt.spark.dao.IMonitorDAO;
import com.bjsxt.spark.dao.ITaskDAO;
import com.bjsxt.spark.dao.factory.DAOFactory;
import com.bjsxt.spark.domain.Area;
import com.bjsxt.spark.domain.MonitorState;
import com.bjsxt.spark.domain.Task;
import com.bjsxt.spark.domain.TopNMonitor2CarCount;
import com.bjsxt.spark.domain.TopNMonitorDetailInfo;
import com.bjsxt.spark.util.DateUtils;
import com.bjsxt.spark.util.ParamUtils;
import com.bjsxt.spark.util.SparkUtils;
import com.bjsxt.spark.util.StringUtils;
import com.spark.spark.test.MockData;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
/**
* 卡扣流量监控模块
* 1.检测卡扣状态
* 2.获取车流排名前N的卡扣号
* 3.数据库保存累加器5个状态(正常卡扣数,异常卡扣数,正常摄像头数,异常摄像头数,异常摄像头的详细信息)
* 4.topN 卡口的车流量具体信息存库
* 5.获取高速通过的TOPN卡扣
* 6.获取车辆高速通过的TOPN卡扣,每一个卡扣中车辆速度最快的前10名
* 7.区域碰撞分析
* 8.卡扣碰撞分析
*
*
* ./spark-submit --master spark://node1:7077,node2:7077
* --class com.bjsxt.spark.skynet.MonitorFlowAnalyze
* --driver-class-path ../lib/mysql-connector-java-5.1.6.jar:../lib/fastjson-1.2.11.jar
* --jars ../lib/mysql-connector-java-5.1.6.jar,../lib/fastjson-1.2.11.jar
* ../lib/ProduceData2Hive.jar
* 1
*
* @author root
*
*/
public class MonitorFlowAnalyze {
public static void main(String[] args) {
/**
* 判断应用程序是否在本地执行
*/
JavaSparkContext sc = null;
SparkSession spark = null;
Boolean onLocal = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
if(onLocal){
// 构建Spark运行时的环境参数
SparkConf conf = new SparkConf()
.setAppName(Constants.SPARK_APP_NAME)
// .set("spark.sql.shuffle.partitions", "300")
// .set("spark.default.parallelism", "100")
// .set("spark.storage.memoryFraction", "0.5")
// .set("spark.shuffle.consolidateFiles", "true")
// .set("spark.shuffle.file.buffer", "64")
// .set("spark.shuffle.memoryFraction", "0.3")
// .set("spark.reducer.maxSizeInFlight", "96")
// .set("spark.shuffle.io.maxRetries", "60")
// .set("spark.shuffle.io.retryWait", "60")
// .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
// .registerKryoClasses(new Class[]{SpeedSortKey.class})
;
/**
* 设置spark运行时的master 根据配置文件来决定的
*/
conf.setMaster("local");
sc = new JavaSparkContext(conf);
//这里不会真正的创建SparkSession,而是根据前面这个SparkContext来获取封装SparkSession,因为不会创建存在两个SparkContext的。
spark = SparkSession.builder().getOrCreate();
/**
* 基于本地测试生成模拟测试数据,如果在集群中运行的话,直接操作Hive中的表就可以
* 本地模拟数据注册成一张临时表
* monitor_flow_action 数据表:监控车流量所有数据
* monitor_camera_info 标准表:卡扣对应摄像头标准表
*/
MockData.mock(sc, spark);
}else{
System.out.println("++++++++++++++++++++++++++++++++++++++开启hive的支持");
/**
* "SELECT * FROM table1 join table2 ON (连接条件)" 如果某一个表小于20G 他会自动广播出去
* 会将小于spark.sql.autoBroadcastJoinThreshold值(默认为10M)的表广播到executor节点,不走shuffle过程,更加高效。
*
* config("spark.sql.autoBroadcastJoinThreshold", "1048576000"); //单位:字节
*/
spark = SparkSession.builder().appName(Constants.SPARK_APP_NAME).enableHiveSupport().getOrCreate();
sc = new JavaSparkContext(spark.sparkContext());
spark.sql("use traffic");
}
/**
* 从配置文件my.properties中拿到spark.local.taskId.monitorFlow的taskId
*/
long taskId = ParamUtils.getTaskIdFromArgs(args, Constants.SPARK_LOCAL_TASKID_MONITOR);
if(taskId == 0L){
System.out.println("args is null.....");
return;
}
/**
* 获取ITaskDAO的对象,通过taskId查询出来的数据封装到Task(自定义)对象
*/
ITaskDAO taskDAO = DAOFactory.getTaskDAO();
Task task = taskDAO.findTaskById(taskId);
if(task == null){
return;
}
/**
* task.getTaskParams()是一个json格式的字符串 封装到taskParamsJsonObject
* 将 task_parm字符串转换成json格式数据。
*/
JSONObject taskParamsJsonObject = JSONObject.parseObject(task.getTaskParams());
/**
* 通过params(json字符串)查询monitor_flow_action
*
* 获取指定日期内检测的monitor_flow_action中车流量数据,返回JavaRDD<Row>
*/
JavaRDD<Row> cameraRDD = SparkUtils.getCameraRDDByDateRange(spark, taskParamsJsonObject);
/**
* 创建了一个自定义的累加器
*/
// Accumulator<String> monitorAndCameraStateAccumulator =
// sc.accumulator("", new MonitorAndCameraStateAccumulator());
SelfDefineAccumulator monitorAndCameraStateAccumulator = new SelfDefineAccumulator();
spark.sparkContext().register(monitorAndCameraStateAccumulator,"SelfAccumulator");
/**
* 将row类型的RDD 转换成kv格式的RDD k:monitor_id v:row
*/
JavaPairRDD<String, Row> monitor2DetailRDD = getMonitor2DetailRDD(cameraRDD);
/**
* monitor2DetailRDD进行了持久化
*/
monitor2DetailRDD = monitor2DetailRDD.cache();
/**
* 按照卡扣号分组,对应的数据是:每个卡扣号(monitor)对应的Row信息
* 由于一共有9个卡扣号,这里groupByKey后一共有9组数据。
*/
JavaPairRDD<String, Iterable<Row>> monitorId2RowsRDD = monitor2DetailRDD.groupByKey();
/**
* 遍历分组后的RDD,拼接字符串
* 数据中一共就有9个monitorId信息,那么聚合之后的信息也是9条
* monitor_id=|cameraIds=|area_id=|camera_count=|carCount=
* 例如:
* ("0005","monitorId=0005|camearIds=09200,03243,02435,03232|cameraCount=4|carCount=100")
*
*/
JavaPairRDD<String, String> aggregateMonitorId2DetailRDD = aggreagteByMonitor(monitorId2RowsRDD);
/**
* 检测卡扣状态
* carCount2MonitorRDD
* K:car_count V:monitor_id
* RDD(卡扣对应车流量总数,对应的卡扣号)
*/
JavaPairRDD<Integer, String> carCount2MonitorRDD =
checkMonitorState(sc,spark,aggregateMonitorId2DetailRDD,taskId,taskParamsJsonObject,monitorAndCameraStateAccumulator);
/**
* action 类算子触发以上操作
*
*/
carCount2MonitorRDD.count();
/**
* 往数据库表 monitor_state 中保存 累加器累加的五个状态
*/
saveMonitorState(taskId,monitorAndCameraStateAccumulator);
/************车辆轨迹**cameraRDD************/
//从所有数据中找出卡扣 0001 下的车辆
List<String> cars = cameraRDD.filter(new Function<Row, Boolean>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Boolean call(Row row) {
return "0001".equals(row.getAs("monitor_id")+"");
}
}).map(new Function<Row, String>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public String call(Row row) {
return row.getAs("car")+"";
}
}).distinct().take(20);
final Broadcast<List<String>> bc = sc.broadcast(cars);
cameraRDD.mapToPair(new PairFunction<Row, String, Row>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, Row> call(Row row) {
return new Tuple2<>(row.getAs("car")+"", row);
}
}).filter(new Function<Tuple2<String,Row>, Boolean>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Boolean call(Tuple2<String, Row> tuple) {
return bc.value().contains(tuple._1);
}
}).groupByKey().foreach(new VoidFunction<Tuple2<String,Iterable<Row>>>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public void call(Tuple2<String, Iterable<Row>> tp) {
String car = tp._1;
@SuppressWarnings("unchecked")
List<Row> rowList = IteratorUtils.toList(tp._2.iterator());
Collections.sort(rowList,new Comparator<Row>() {
@Override
public int compare(Row r1, Row r2) {
String r1Time = r1.getAs("action_time")+"";
String r2Time = r2.getAs("action_time")+"";
return DateUtils.before(r1Time, r2Time)?-1:1;
}
});
String carTracker = "";
for(Row row:rowList) {
carTracker += "-->"+row.getAs("monitor_id")+"_"+row.getAs("action_time");
}
System.out.println("car = "+car +",carTracker = "+carTracker.substring(3));
}
});
/**************************/
/**
* 获取车流排名前N的卡扣号
* 并放入数据库表 topn_monitor_car_count 中
* return KV格式的RDD K:monitor_id V:monitor_id
* 返回的是topN的(monitor_id,monitor_id)
*/
JavaPairRDD<String, String> topNMonitor2CarFlow =
getTopNMonitorCarFlow(sc,taskId,taskParamsJsonObject,carCount2MonitorRDD);
/**
* 获取topN 卡口的车流量具体信息,存入数据库表 topn_monitor_detail_info 中
*/
getTopNDetails(taskId,topNMonitor2CarFlow,monitor2DetailRDD);
/**
* 获取车辆高速通过的TOPN卡扣
*/
List<String> top5MonitorIds = speedTopNMonitor(monitorId2RowsRDD);
for (String monitorId : top5MonitorIds) {
System.out.println("车辆经常高速通过的卡扣 monitorId:"+monitorId);
}
/**
* 获取车辆高速通过的TOPN卡扣,每一个卡扣中车辆速度最快的前10名,并存入数据库表 top10_speed_detail 中
*/
getMonitorDetails(sc,taskId,top5MonitorIds,monitor2DetailRDD);
/**
* 区域碰撞分析,直接打印显示出来。
* "01","02" 指的是两个区域
*/
CarPeng(spark,taskParamsJsonObject,"01","02");
/**
* 卡扣碰撞分析,直接打印结果
*
*/
areaCarPeng(spark,taskParamsJsonObject);
System.out.println("******All is finished*******");
sc.close();
}
private static void lineCar(JavaSparkContext javaSparkContext, final String FiltermonitorId, JavaRDD<Row> cameraRDD) {
System.out.println("******start lineCar*******");
JavaRDD<String> carsInFilterMonitorIdRDD = cameraRDD.mapToPair(new PairFunction<Row, String, String>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, String> call(Row row) {
return new Tuple2<String, String>(row.getString(1), row.getString(3));
}
}).filter(new Function<Tuple2<String, String>, Boolean>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Boolean call(Tuple2<String, String> v1) {
if (FiltermonitorId.equals(v1._1)
车流量
最新推荐文章于 2023-11-21 16:35:04 发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)