package com.ultrapower.main;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.ultrapower.bean.MyHashSet;
import com.ultrapower.bigdata.elasticsearch.ElasticSearchUtils;
import com.ultrapower.bigdata.kafka.KafkaOffsetUtil;
import com.ultrapower.bigdata.kafka.KafkaTool;
import com.ultrapower.bigdata.redis.JavaRedisClient;
import com.ultrapower.bigdata.spark.JavaSparkSessionSingleton;
import com.ultrapower.bigdata.zookeeper.MyZkSerializer;
import com.ultrapower.db.ConnectionPool;
import com.ultrapower.db.DbTool;
import com.ultrapower.model.CoreConfig;
import com.ultrapower.model.DbConfig;
import com.ultrapower.util.StringUtil;
import com.ultrapower.util.TimeUtil;
import kafka.utils.ZKGroupTopicDirs;
import org.I0Itec.zkclient.ZkClient;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.*;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.StreamingContext;
import org.apache.spark.streaming.api.java.*;
import org.apache.spark.streaming.kafka010.HasOffsetRanges;
import org.apache.spark.streaming.kafka010.OffsetRange;
import org.apache.spark.streaming.scheduler.StatsReportListener;
import org.apache.spark.util.LongAccumulator;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.transport.TransportClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import redis.clients.jedis.Jedis;
import scala.Tuple2;
import scala.Tuple3;
import scala.reflect.ClassTag;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.*;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
/**
* @Description: spark对日志的清洗拼接主程序;
* @Author: ydw
* @Date: Created in 11:00 2018/6/27
* @Modified by () on ().
* 开发第二版,针对不携带业务标识的日志特征进行代码修改
*/
public class LogOfBOMCDataManagementPlatformAndControlStitching {
private static final long serialVersionUID = 1L;
private static final Logger log = LoggerFactory.getLogger(LogOfBOMCDataManagementPlatformAndControlStitching.class);
public static void main(String[] args) {
//System.setProperty("hadoop.home.dir", "D:\\hadoop-2.8.4");
SparkSession spark = JavaSparkSessionSingleton.getInstance();
//=====================================spark sql=================================================================
//将模型里的业务表对应生成dataframe
Dataset<Row> chinamobile_bus = DbTool.getTable(spark, DbConfig.BUSTABLE);
//将模型里的环节表对应生成dataframe
Dataset<Row> chinamobile_stage = DbTool.getTable(spark, DbConfig.STAGETABLE);
//将模型里的字段表对应生成dataframe
Dataset<Row> chinamobile_field = DbTool.getTable(spark, DbConfig.FIELDTABLE);
//将模型里的环节串联关系表对应生成dataframe
Dataset<Row> chinamobile_stageRelative = DbTool.getTable(spark, DbConfig.STAGERELATIVETABLE);
//将模型里的业务耗时超时判断规则表对应生成dataframe
Dataset<Row> chinamobile_busTime = DbTool.getTable(spark, DbConfig.BUSTIMETABLE);
//将模型里的业务状态判断规则表对应生成dataframe
Dataset<Row> chinamobile_busStatus = DbTool.getTable(spark, DbConfig.BUSSTATUSTABLE);
//缓存dataframe
chinamobile_bus.cache();
chinamobile_stage.cache();
chinamobile_field.cache();
chinamobile_stageRelative.cache();
chinamobile_busStatus.cache();
chinamobile_busTime.cache();
//将数据模型注册临时表
chinamobile_bus.createOrReplaceTempView("chinamobile_bus");
chinamobile_stage.createOrReplaceTempView("chinamobile_stage");
chinamobile_field.createOrReplaceTempView("chinamobile_field");
chinamobile_stageRelative.createOrReplaceTempView("chinamobile_stageRelative");
chinamobile_busStatus.createOrReplaceTempView("chinamobile_busStatus");
chinamobile_busTime.createOrReplaceTempView("chinamobile_busTime");
List<Row> bus = spark.sql("select busName from chinamobile_bus where pid = 0").collectAsList();
log.warn("一共查询到" + bus.size() + "种一级业务");
List<Row> stage = spark.sql("select stageId,stageName,backStage from chinamobile_stage").collectAsList();
List<Row> busStage = spark.sql("select b.busName,s.stageName,s.logMaxLength from chinamobile_bus b join chinamobile_stage s ON b.busId = s.busId ").collectAsList();
//List<Row> stageField = spark.sql("select * from chinamobile_stage s join chinamobile_field f on s.stageId=f.stageId ").collectAsList();
List<Row> busStageField = spark.sql("select b.busName,s.stageName,f.fieldName,f.isKeep,f.isNull,f.isBusType,f.type from chinamobile_bus b join chinamobile_stage s ON b.busId = s.busId join chinamobile_field f on s.stageId=f.stageId ").collectAsList();
List<Row> stageRelative = spark.sql("select * from chinamobile_stagerelative ").collectAsList();
List<Row> busBusStatus = spark.sql("select b.busName,s.stageIds,s.keyInDb from chinamobile_bus b join chinamobile_busStatus s ON b.busId = s.busId ").collectAsList();
List<Row> busBusTime = spark.sql("select b.busName,t.stageIds,t.keyInDb,t.costTimeRule from chinamobile_bus b join chinamobile_busTime t on b.busId = t.busId").collectAsList();
List<Row> codeBusType = spark.sql("select b.code,b.busName,s.stageName,f.fieldName from (select stageId,fieldName from chinamobile_field where isBusType = 1) f join chinamobile_stage s ON f.stageId = s.stageId join chinamobile_bus b on s.busId = b.busId").collectAsList();
//广播变量
ClassTag<List> tag = scala.reflect.ClassTag$.MODULE$.apply(List.class);
Broadcast<List> stageBroadCast = spark.sparkContext().broadcast(stage, tag);
Broadcast<List> busStageBroadCast = spark.sparkContext().broadcast(busStage, tag);
Broadcast<List> busStageFieldBroadCast = spark.sparkContext().broadcast(busStageField, tag);
Broadcast<List> stageRelativeBroadCast = spark.sparkContext().broadcast(stageRelative, tag);
Broadcast<List> busBusStatusBroadCast = spark.sparkContext().broadcast(busBusStatus, tag);
Broadcast<List> busBusTimeBroadCast = spark.sparkContext().broadcast(busBusTime, tag);
Broadcast<List> codeBusTypeBroadCast = spark.sparkContext().broadcast(codeBusType, tag);
//=====================================spark streaming==========================================================
//create javaStreamContext
SparkContext sparkContext = spark.sparkContext();
StreamingContext sc = new StreamingContext(sparkContext, Durations.seconds(CoreConfig.SPARK_STREAMING_SECONDS));
JavaStreamingContext jsc = new JavaStreamingContext(sc);
//累加变量spark2.0
final LongAccumulator kafkaAccumulator = sparkContext.longAccumulator("Records pulled from kafka");
final LongAccumulator repeatedAccumulator = sparkContext.longAccumulator("Records repeated in the same stage");
/**
* 定时将累加器的数据写入数据库;
* todo 服务器集群环境有点线程问题;
*/
long uuid = System.currentTimeMillis();
Connection connection = ConnectionPool.getConnection();
Runnable runnable = new Runnable() {
@Override
public void run() {
//写数据库
long writeTime = System.currentTimeMillis();
String sql = "insert into " + DbConfig.RECORDCOUNTTABLE + "(UUID,ENDTIME,SPARKTOTAL,INSPECTFLAG,REPEATEDNUM) values("
+ uuid + "," + writeTime + "," + kafkaAccumulator.value() + "," + 0 + "," + repeatedAccumulator.value() + ")";
try {
Statement statement = connection.createStatement();
statement.executeUpdate(sql);
} catch (SQLException e) {
e.printStackTrace();
}
ConnectionPool.returnConnection(connection);
}
};
ScheduledExecutorService service = Executors.newSingleThreadScheduledExecutor();
// 第二个参数为首次执行的延时时间,第三个参数为定时执行的间隔时间
service.scheduleAtFixedRate(runnable, 5, DbConfig.COUNTPERIOD, TimeUnit.SECONDS);
//todo
//offset的path
ZKGroupTopicDirs zkGroupTopicDirs = new ZKGroupTopicDirs(CoreConfig.ZK_KAFKA_OFFSET_NAME, CoreConfig.KAFKA_TOPIC);
final String zkTopicPath = zkGroupTopicDirs.consumerOffsetDir();
/**
* create DStream
* 对kafkaLogs进行cache()的时候报错:
* ERROR KafkaRDD: Kafka ConsumerRecord is not serializable. Use .map to extract fields before calling .persist or .window
* 原因:ConsumerRecord 没有实现序列化接口,经测试不能被序列化,故不能进行持久化;
*/
JavaInputDStream<ConsumerRecord<String, String>> kafkaLogs = KafkaTool.createDirectStream(jsc, zkTopicPath);
/**
* 1、对拿到的所有日志进行计数;
* 2、对每一条数据添加时间戳;
* 3、将原始日志msg字段拆分出来,通过uuid指向原始日志
*/
JavaDStream<Tuple3<String, String, String>> kafkaLines = kafkaLogs.map(new Function<ConsumerRecord<String, String>, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> call(ConsumerRecord<String, String> v1) throws Exception {
JSONObject jsonObject = JSONObject.parseObject(v1.value());
jsonObject.put("checkTimeStamp", System.currentTimeMillis());
kafkaAccumulator.add(1);
//20180921将环节日志携带的msg字段单独建表,存入es,msg里存放关联的uuid;
String msgUuid = null;
String msgValue = null;
if (jsonObject.containsKey("msg")) {
msgUuid = UUID.randomUUID().toString();
msgValue = jsonObject.getString("msg");
jsonObject.put("msg", msgUuid);
}
return new Tuple3<>(jsonObject.toJSONString(), msgUuid, msgValue);
}
});
/**
* 这里考虑性能,是否进行重分区
*/
//JavaDStream<Tuple3<String, String, String>> repartitionLines = kafkaLines.repartition(CoreConfig.SPARK_STREAMING_CORES * 3);
kafkaLines.cache();
/**
* 将msg信息单独存入索引,索引的生成规则相同
*/
kafkaLines.foreachRDD(new VoidFunction<JavaRDD<Tuple3<String, String, String>>>() {
@Override
public void call(JavaRDD<Tuple3<String, String, String>> tuple3JavaRDD) throws Exception {
tuple3JavaRDD.foreachPartition(new VoidFunction<Iterator<Tuple3<String, String, String>>>() {
@Override
public void call(Iterator<Tuple3<String, String, String>> tuple3Iterator) throws Exception {
//创建es客户端连接
TransportClient client1 = ElasticSearchUtils.getTransportClient();
BulkRequestBuilder bulkRequestBuilder1 = client1.prepareBulk();
while (tuple3Iterator.hasNext()){
Tuple3<String, String, String> next = tuple3Iterator.next();
String msgId =next._2();
String value = next._3();
Map<String,String> map = new HashMap<>();
if(msgId!=null&&value!=null&&!"".equals(value)){
map.put("msgId",msgId);
map.put("msg",value);
String index = "msg"+TimeUtil.getTableName(CoreConfig.BUSLOG_INTERVAL);
IndexRequestBuilder doc = client1.prepareIndex(index, "/all", msgId).setSource(map);
bulkRequestBuilder1.add(doc);
if (bulkRequestBuilder1.numberOfActions() % CoreConfig.ES_BULKSIZE == 0 && bulkRequestBuilder1.numberOfActions() != 0) {
BulkResponse bulkResponse = bulkRequestBuilder1.execute().actionGet();
log.warn(TimeUtil.formattedCurrentTime() + " 向es提交了 " + bulkRequestBuilder1.numberOfActions() + " 条操作");
if (bulkResponse.hasFailures()) {
log.warn("error in doing index request: " + bulkResponse.buildFailureMessage());
}
}
}
}
/**
* 将最后不到1000条的数据执行操作:删除/写入
*/
if (bulkRequestBuilder1.numberOfActions() > 0) {
BulkResponse bulkResponse = bulkRequestBuilder1.execute().actionGet();
log.warn(TimeUtil.formattedCurrentTime() + " 向es提交了 " + bulkRequestBuilder1.numberOfActions() + " 条操作");
if (bulkResponse.hasFailures()) {
log.warn("error in doing index request: " + bulkResponse.buildFailureMessage());
}
}
}
});
}
});
/**
* 提取日志
*/
JavaDStream<String> lineLog = kafkaLines.map(new Function<Tuple3<String, String, String>, String>() {
@Override
public String call(Tuple3<String, String, String> v1) throws Exception {
return v1._1();
}
});
/**
* 拼接业务:把同一笔业务放入redis的同一个hash表里;
* 1、查询数据模型,获取关联字段所对应的值,例如f1,f2;
* 2、以f1,f2作为key去redis里查询判断是否存在:
* a)如果f1,f2都不存在,则暂无关联日志,把当前环节日志放入redis缓存;
* 1、在redis维护多个key-value对(有生存时长):key:f1,f2 value:生成的uuid;
* f1:uuid
* f2:uuid
* 2、在redis维护一个hash表(有生存时长):key:uuid field:f1,f2 value:环节日志;
* uuid:
* f1:环节日志
* f2:环节日志
* b)如果f1,f2只存在其一;
* c)如果f1,f2都存在;
* 把b,c放在一起处理,去redis获取任意存在的f,通过key-value映射获取其对应的minUuid;
* 1、把本环节日志存入以minUuid作为key的hash表结构;
* 2、去redis获取余下的f',通过key-value映射获取其对应的uuid',把uuid'里的数据全部写入minUuid,此时minUuid里就保存了最为全量的能够相互关联的环节日志
* minUuid:
* f1:环节日志1 &&& 环节日志2
* f2:环节日志2 &&& 环节日志3
* f3:环节日志3 &&& 环节日志4
* f4:环节日志4 &&& 环节日志5
* 3、更新f'的指向,通过key-value映射指向minUuid(更新生存时长);
* 4、返回值:业务更全的uuid,,之前数据不全的uuid的set集合,需要去es删除对应id;
*/
JavaPairDStream<String, Set<String>> busUuidAndDropUuid = lineLog.mapPartitionsToPair(new PairFlatMapFunction<Iterator<String>, String, Set<String>>() {
@Override
public Iterator<Tuple2<String, Set<String>>> call(Iterator<String> stringIterator) throws Exception {
List<Row> stageRelative = stageRelativeBroadCast.value();
/**
* 创建redis连接
*/
Jedis jedis = null;
try {
JavaRedisClient.makePool(CoreConfig.REDIS_IP, CoreConfig.REDIS_PORT, CoreConfig.REDIS_TIMEOUT, CoreConfig.REDIS_MAXTOTAL, CoreConfig.REDIS_MAXIDLE, CoreConfig.REDIS_MINIDLE);
jedis = JavaRedisClient.getPool().getResource();
} catch (Exception e) {
log.warn(TimeUtil.formattedCurrentTime() + " redis连接异常");
}
/**
* 保存返回结果
*/
List<Tuple2<String, Set<String>>> list = new ArrayList<>();
/**
* 逻辑
*/
while (stringIterator.hasNext()) {
JSONObject jsonObject = JSON.parseObject(stringIterator.next());
String stageName = jsonObject.getString(CoreConfig.STAGEFIELDNAME);
/**
* 把关联字段所对应的值(value)放入set;
*/
Set<String> keyList = new HashSet<>();
/**
* 从chinamobile_stagerelative过滤当前业务并且sourceStageName字段等于当前环节的数据
*/
List<Row> rows = getRelativedFieldOfTheStage(stageRelative, "sourceStageName", stageName);
/**
* 从chinamobile_stagerelative过滤当前业务并且relatedStageName字段等于当前环节的数据
*/
List<Row> rows2 = getRelativedFieldOfTheStage(stageRelative, "relatedStageName", stageName);
if (rows.size() == 0 && rows2.size() == 0) {
log.warn("关联表中未查到" + stageName + "环节的关联信息");
}
/**
* @sourceFieldName: 从sourceFieldName环节获取的关联字段名
* @key: 关联字段名对应的值
*/
if (rows.size() > 0) {
for (int i = 0; i < rows.size(); i++) {
String sourceFieldName = null;
try {
sourceFieldName = rows.get(i).getAs("sourceFieldName").toString();
} catch (Exception e) {
}
if (sourceFieldName != null && !"".equals(sourceFieldName)) {
String key = LogOfBOMCUtil.relativeFieldValueAsKey(sourceFieldName, jsonObject);
if (key != null) {
keyList.add(key);
}
}
}
}
/**
* @relatedFieldName: 从relatedFieldName环节获取的关联字段名
* @key: 关联字段名对应的值
*/
if (rows2.size() > 0) {
for (int i = 0; i < rows2.size(); i++) {
String relatedFieldName = null;
try {
relatedFieldName = rows2.get(i).getAs("relatedFieldName").toString();
} catch (Exception e) {
}
if (relatedFieldName != null && !"".equals(relatedFieldName)) {
String key = LogOfBOMCUtil.relativeFieldValueAsKey(relatedFieldName, jsonObject);
if (key != null) {
keyList.add(key);
}
}
}
}
/**
* @flag
* true:环节包含的所有关联字段至少有一个在redis里出现;
* false;环节包含的所有关联字段都没有在redis里出现;
*/
boolean flag = false;
for (String k : keyList) {
if (jedis.exists(k)) {
flag = true;
break;
}
}
String minUuid = null;
Set<String> dropUuidInEs = new HashSet<>();
/**
* 如果false,环节包含的所有关联字段都没有在redis里出现:
* 1、在redis创建多个键值对,key为关联字段的值,value为uuid(不管以后通过哪个字段查询,都能找到那个保存了可以拼接成一条业务的所有环节的的uuid)
* 2、在redis里创建一个hashmap,key为uuid,field为关联字段的值,value为环节日志
*/
if (!flag) {
minUuid = UUID.randomUUID().toString();
if (!keyList.isEmpty()) {
for (String k : keyList) {
if (k != null && !"".equals(k)) {
jedis.setex(k, CoreConfig.REDIS_EXPIRE_DURATION, minUuid);
jedis.hset(minUuid, k, jsonObject.toJSONString());
} else {
//关联字段值为空
jedis.hset(minUuid, "emptyValue", jsonObject.toJSONString());
}
jedis.expire(minUuid, CoreConfig.REDIS_EXPIRE_DURATION);
}
} else {
jsonObject.put("legitimateMark", 8);
jedis.hset(minUuid, "000", jsonObject.toJSONString());
jedis.expire(minUuid, CoreConfig.REDIS_EXPIRE_DURATION);
}
/**
* 如果true:环节包含的所有关联字段至少有一个在redis里出现
*/
} else {
/**
* 1、对list进行循环,如果redis里有那个key,就取出对应的uuid并做对比,找出最小值(最早创建的uuid),
* 2、其实没有必要找最小uuid,使用随便一个就行,只要把数据放在一起就可以
*/
for (String k : keyList) {
if (jedis.exists(k)) {
//初始化minUuid,将迭代的第一个值赋值给他
if (minUuid == null) {
minUuid = jedis.get(k);
break;//todo ?
}
}
}
/**
* 2、把本环节日志并入minUuid
* 3、然后把其他uuid里保存的数据并进去,得到较为完整的数据;
* for:对每个关联字段值进行遍历
*/
boolean repeatedCount = true;
for (String k : keyList) {
//查看哈希表 key 中,指定的字段是否存在。
Boolean hexists = jedis.hexists(minUuid, k);
if (hexists) {
String oldValue = jedis.hget(minUuid, k);
/**
* 如果key存在,查看对应的值中是否已经拼接了本环节,如果拼接了,以后就会涉及覆盖操作,对这种情况进行计数
*/
if (repeatedCount) {
String now = jsonObject.getString(CoreConfig.STAGEFIELDNAME);
String[] split = oldValue.split("&&&");
for (String s : split) {
JSONObject j = JSONObject.parseObject(s);
String old = j.getString(CoreConfig.STAGEFIELDNAME);
if (now != null && !"".equals(now)) {
if (now.equals(old)) {
repeatedAccumulator.add(1);
repeatedCount = false;
break;
}
}
}
}
/**
* 如果对redis里的环节日志遍历完,没有发现这个环节的日志,就把这个环节日志拼接进去
*/
if (repeatedCount) {
jedis.hset(minUuid, k, jsonObject.toJSONString() + "&&&" + oldValue);
}
} else {
jedis.hset(minUuid, k, jsonObject.toJSONString());
}
/**
* 如果redis里有以此关联字段值作为key,拿到他对应的uuid,
* 如果这个uuid不是minUuid的话,将这个uuid里保存的环节数据合并给minUuid
*/
if (jedis.exists(k)) {
String uuid = jedis.get(k);
if (!uuid.equals(minUuid)) {
//获取所有哈希表中的字段
Set<String> hkeys = jedis.hkeys(uuid);
for (String kk : hkeys) {
//查看哈希表 key 中,指定的字段是否存在。
Boolean minUuidExists = jedis.hexists(minUuid, kk);
if (minUuidExists) {
String oldValue = jedis.hget(minUuid, kk);
jedis.hset(minUuid, kk, jedis.hget(uuid, kk) + "&&&" + oldValue);
} else {
jedis.hset(minUuid, kk, jedis.hget(uuid, kk));
}
}
dropUuidInEs.add(uuid);
}
}
}
/**
* 3、以minUuid保存的最全数据为准,更新或者创建所有key对uuid的指向。
* todo 这个30分钟是否需要重置?
*/
Set<String> hkeys = jedis.hkeys(minUuid);
for (String key : hkeys) {
jedis.setex(key, CoreConfig.REDIS_EXPIRE_DURATION, minUuid);
}
/**
* hash表同样重置生存时长
*/
jedis.expire(minUuid, CoreConfig.REDIS_EXPIRE_DURATION);
}
list.add(new Tuple2<String, Set<String>>(minUuid, dropUuidInEs));
}
jedis.close();
return list.iterator();
}
private List<Row> getRelativedFieldOfTheStage(List<Row> stageRelativeTable, String fieldNameOfStage, String stageName) {
List<Row> list = new ArrayList<>();
for (int i = 0; i < stageRelativeTable.size(); i++) {
String ssn = stageRelativeTable.get(i).getAs(fieldNameOfStage).toString();
if (ssn.equals(stageName)) {
list.add(stageRelativeTable.get(i));
}
}
return list;
}
});
/**
* 上个算子处理的流式数据可能存在不同的数据对应相同的minUuid的情况,在这一步把相同minUuid的数据合并去重,set取并集返回;
*/
JavaPairDStream<String, Set<String>> stringSetJavaPairDStream = busUuidAndDropUuid.reduceByKey((x, y) -> {
x.addAll(y);
return x;
});
/**
* 拼接业务:将redis里hash表uuid里维护的环节日志(多个json)组装成map;
* 返回Tuple3<String, Map<String, JSONObject>, Set<String>>:uuid,map,需要在es里删除的id的set集合
*/
JavaDStream<Tuple3<String, Map<String, JSONObject>, Set<String>>> tuple3JavaDStream = stringSetJavaPairDStream.mapPartitions(new FlatMapFunction<Iterator<Tuple2<String, Set<String>>>, Tuple3<String, Map<String, JSONObject>, Set<String>>>() {
@Override
public Iterator<Tuple3<String, Map<String, JSONObject>, Set<String>>> call(Iterator<Tuple2<String, Set<String>>> tuple2Iterator) throws Exception {
Jedis jedis = null;
try {
JavaRedisClient.makePool(CoreConfig.REDIS_IP, CoreConfig.REDIS_PORT, CoreConfig.REDIS_TIMEOUT, CoreConfig.REDIS_MAXTOTAL, CoreConfig.REDIS_MAXIDLE, CoreConfig.REDIS_MINIDLE);
jedis = JavaRedisClient.getPool().getResource();
} catch (Exception e) {
log.warn(TimeUtil.formattedCurrentTime() + " redis连接异常");
}
List<Tuple3<String, Map<String, JSONObject>, Set<String>>> list = new ArrayList<>();
while (tuple2Iterator.hasNext()) {
Tuple2<String, Set<String>> t2 = tuple2Iterator.next();
String uuid = t2._1();
Map<String, JSONObject> stageMap = new HashMap<>();
//获取所有哈希表中的字段
Set<String> keysInUuid = jedis.hkeys(uuid);
for (String key : keysInUuid) {
String stageLogs = jedis.hget(uuid, key);
String[] stageLog = stageLogs.split("&&&");
for (int i = 0; i < stageLog.length; i++) {
JSONObject jsonStageLog = JSONObject.parseObject(stageLog[i]);
String stageFlag = jsonStageLog.getString(CoreConfig.STAGEFIELDNAME);
stageMap.put(stageFlag, jsonStageLog);
}
}
/**
* map有可能为空怎么形成的?
* 1、因为模型里配置的环节名称和日志里携带的名称不符合
* 2、因为日志中没有携带关联字段(已解决,直接入库)
*/
//log.warn(uuid + "-------------->>>>>>>>>>>" + stageMap.toString());
list.add(new Tuple3<>(uuid, stageMap, t2._2()));
}
jedis.close();
return list.iterator();
}
});
/**
* 1、对map里环节进行保留
* 2、对map里环节进行校验标记
* 3、所有的能拼接成一条业务的环节日志都在map里,这个步骤主要进行业务上的分析
*/
JavaDStream<Tuple2<JSONObject, Set<String>>> busJsonAndDropUuidInEs = tuple3JavaDStream.mapPartitions(new FlatMapFunction<Iterator<Tuple3<String, Map<String, JSONObject>, Set<String>>>, Tuple2<JSONObject, Set<String>>>() {
@Override
public Iterator<Tuple2<JSONObject, Set<String>>> call(Iterator<Tuple3<String, Map<String, JSONObject>, Set<String>>> tuple3Iterator) throws Exception {
List<Row> busStage = busStageBroadCast.value();
List<Row> busStageField = busStageFieldBroadCast.value();
/**
* 读取stage表id和stageName的对应关系;
* 读取stage表stageName和下一环节的对应关系;
*/
List<Row> stageList = stageBroadCast.value();
Map<String, String> stageIdToName = new HashMap<>();
Map<String, String> stageToBackStage = new HashMap<>();
for (int i = 0; i < stageList.size(); i++) {
String stageId = stageList.get(i).getAs("stageId").toString();
String stageName = stageList.get(i).getAs("stageName").toString();
stageIdToName.put(stageId, stageName);
//分支情况下会保存多个环节
String backStage = null;
try {
backStage = stageList.get(i).getAs("backStage").toString();
} catch (Exception e) {
}
stageToBackStage.put(stageName, backStage);
}
/**
* 读取业务类型
*/
List<Row> codeBusType = codeBusTypeBroadCast.value();
List<Row> busBusTime = busBusTimeBroadCast.value();
List<Row> busBusStatus = busBusStatusBroadCast.value();
Jedis jedis = null;
try {
JavaRedisClient.makePool(CoreConfig.REDIS_IP, CoreConfig.REDIS_PORT, CoreConfig.REDIS_TIMEOUT, CoreConfig.REDIS_MAXTOTAL, CoreConfig.REDIS_MAXIDLE, CoreConfig.REDIS_MINIDLE);
jedis = JavaRedisClient.getPool().getResource();
} catch (Exception e) {
log.warn(TimeUtil.formattedCurrentTime() + " redis连接异常");
}
List<Tuple2<JSONObject, Set<String>>> list = new ArrayList<>();
while (tuple3Iterator.hasNext()) {
Tuple3<String, Map<String, JSONObject>, Set<String>> t3 = tuple3Iterator.next();
/**
* 环节json,分析得到的业务字段,全部放入这个json串,写入es;
*/
JSONObject busJson = new JSONObject();
busJson.put("busUuid", t3._1());
Map<String, JSONObject> busMap = t3._2();
Set<String> stages = busMap.keySet();
/**
* 先识别业务,识别出业务之后再对数据进行分析
*/
for (int i = 0; i < codeBusType.size(); i++) {
String codeM = codeBusType.get(i).getAs("code").toString();
String busNameM = codeBusType.get(i).getAs("busName").toString();
String stageNameM = codeBusType.get(i).getAs("stageName").toString();
String fieldNameM = codeBusType.get(i).getAs("fieldName").toString();
if (busMap.containsKey(stageNameM)) {
JSONObject temp = busMap.get(stageNameM);
//log.warn(stageNameM + "===>" + temp.toJSONString());
if (temp.containsKey(fieldNameM)) {
String code = temp.getString(fieldNameM);
if (code.equals(codeM) && code != null) {
busJson.put("busName", busNameM);
break;
}
} else {
log.warn(stageNameM + "环节未携带" + fieldNameM + "字段");
}
}
}
/**
* 只有在识别出业务之后才进行分析
*/
if (busJson.containsKey("busName")) {
/**
* 获取携带的业务标识
*/
String busFlag = busJson.getString("busName");//充值开机
/**
* 准备获取所有环节里最小的开始时间,最大的结束时间;
*/
long minStartTime = Long.MAX_VALUE;
long maxEndTime = 0L;
String minStartTimeStage = null;
String maxEndTimeStage = null;
Set<String> getAllStages = new MyHashSet<>();//把所有环节名放入一个字段
for (String stage : stages) {
getAllStages.add(stage);
JSONObject oldJsonLog = busMap.get(stage);//获取到每个环节的日志
/**
* 1、通过读取数据模型配置的保留字段对环节日志里需要保留的字段进行提取保留;
* 2、为防止各环节字段相同相互覆盖,给key添加stage前缀,以便在写入es时做字段区分;
*/
JSONObject jsonStageLog = LogOfBOMCUtil.getKeepFieldJson(oldJsonLog, busFlag, stage, busStageField);
/**
* 由于前面添加的计数时间戳在这一步需要保留并且模型里是没有这个字段信息的,所以要单独提取保留;
*/
jsonStageLog.put(stage + "_" + "checkTimeStamp", oldJsonLog.getLong("checkTimeStamp"));
/**
* 对环节日志进行规则校验,打上规则校验的标记;
*/
int sFlag = LogOfBOMCUtil.isLegitimateLog(jsonStageLog, busFlag, stage, busStage, busStageField);
jsonStageLog.put(stage + "_" + "legitimateMark", sFlag);
/**
* 更新map里的json
*/
busMap.put(stage, jsonStageLog);
/**
* 获取所有环节里最小的开始时间,最大的结束时间;
*/
String startTimeKey = stage + "_" + "startTime";
if (jsonStageLog.containsKey(startTimeKey)) {
long min = StringUtil.analysisTimeKey(jsonStageLog, startTimeKey);
if (min > 0) {
minStartTime = min < minStartTime ? min : minStartTime;
if (minStartTime == min) {
minStartTimeStage = stage;
}
}
}
String endTimeKey = stage + "_" + "endTime";
if (jsonStageLog.containsKey(endTimeKey)) {
long max = StringUtil.analysisTimeKey(jsonStageLog, endTimeKey);
if (max > 0) {
maxEndTime = max > maxEndTime ? max : maxEndTime;
if (maxEndTime == max) {
maxEndTimeStage = stage;
}
}
}
/**
* 判断环节超时:和保存在redis里的各环节超时阀值对比 1:超时 0:未超时
*/
String costTimeKey = stage + "_" + "costTime";
if (jsonStageLog.containsKey(costTimeKey)) {
long stageCostTime = StringUtil.analysisCostTimeKey(jsonStageLog, costTimeKey);
if (jedis.hexists(busFlag + "Threshold", stage + "Threshold")) {
String sct = jedis.hget(busFlag + "Threshold", stage + "Threshold");
long stageCostTimeThreshold = Long.parseLong(sct);
if (stageCostTime >= stageCostTimeThreshold) {
jsonStageLog.put(stage + "_" + "timeOut", 1);
} else {
jsonStageLog.put(stage + "_" + "timeOut", 0);
}
busMap.put(stage, jsonStageLog);//更新map里的json
} else {
log.warn("没有在redis里找到环节的超时阀值设置");
}
}
/**
* =============================================================================================
* 判断环节的成功失败:首先要看各个环节是否携带环节成功失败的标识,
* 没有标识就看是否有下一环节日志,有的话就标识成功,没有标识失败;
* 最后一个环节肯定成功,因为前面通过了清洗规则,所有字段齐全;
*/
String statusKey = stage + "_" + "status";
if (!jsonStageLog.containsKey(statusKey)) {
String backStage = stageToBackStage.get(stage);
if (!"".equals(backStage) && backStage != null) {
String[] backStageArray = backStage.split(",");
for (int i = 0; i < backStageArray.length; i++) {
if (busMap.containsKey(backStageArray[i])) {
/**
* 环节status表示环节的状态:1 成功 0 失败
*/
jsonStageLog.put(statusKey, 1);
break;
} else {
jsonStageLog.put(statusKey, 0);//todo 这里可能有点问题
}
}
} else {
if (jsonStageLog.getInteger(stage + "_" + "legitimateMark") != 4) {
jsonStageLog.put(statusKey, 1);
} else {
jsonStageLog.put(statusKey, 0);
}
}
busMap.put(stage, jsonStageLog);//更新map里的json
}
}
//这个字段里保存拼接的所有环节的环节标识
busJson.put(CoreConfig.STAGEFIELDNAME, getAllStages.toString());
/**
* =================================================================================================
* 计算业务的起始时间、结束时间、起始环节、结束环节:
* 是拼接到的环节里的开始结束,在业务不完整的情况下,起始环节可能并非真正意义上的第一个环节,结束环节也可能并非真正意义上的最后一个环节
*/
if (minStartTime == Integer.MAX_VALUE) {
busJson.put("busMinStartTime", 0);
} else {
busJson.put("busMinStartTime", minStartTime);
}
busJson.put("busMaxEndTime", maxEndTime);
busJson.put("minStartTimeStage", minStartTimeStage);
busJson.put("maxEndTimeStage", maxEndTimeStage);
/**
* =================================================================================================
* 计算业务耗时:
* costTimeRule 0:对应耗时计算规则1:maxEndTime - minStartTime
* costTimeRule 1:对应耗时计算规则2:将参与耗时计算的环节进行耗时求和;
* judgeCostTime 0:环节不参与 1 环节参与
*/
List<Row> costTimeRows = new ArrayList<>();
for (int i = 0; i < busBusTime.size(); i++) {
String bn = busBusTime.get(i).getAs("busName").toString();
if (bn.equals(busFlag) && busFlag != null) {//业务标识携带的是业务代码,和模型里的code对比
costTimeRows.add(busBusTime.get(i));
}
}
if (costTimeRows.size() > 0) {
//log.warn("costTimeRows.size:" + String.valueOf(costTimeRows.size()));
for (int i = 0; i < costTimeRows.size(); i++) {
//int costTimeRule = costTimeRows.get(i).getInt(costTimeRows.get(i).fieldIndex("costTimeRule"));
int costTimeRule = costTimeRows.get(i).getAs("costTimeRule");
String stageIds = costTimeRows.get(i).getAs("stageIds").toString();
String keyInDb = costTimeRows.get(i).getAs("keyInDb").toString();
String[] stageIdsArray = stageIds.split(",");
Set<String> stageNameSet = new HashSet();
for (int k = 0; k < stageIdsArray.length; k++) {
stageNameSet.add(stageIdToName.get(stageIdsArray[k].trim()));
}
//log.warn("=======>>>" + stageNameSet.toString());
if (stages.containsAll(stageNameSet)) {
long minTemp = Long.MAX_VALUE;
long maxTemp = 0L;
long busCostTimeTotal = 0L;
for (String stage : stageNameSet) {
JSONObject jsonS1 = busMap.get(stage);
String startTimeKey = stage + "_" + "startTime";
if (jsonS1.containsKey(startTimeKey)) {
long min = StringUtil.analysisTimeKey(jsonS1, startTimeKey);
if (min > 0) {
minTemp = min < minTemp ? min : minTemp;
}
}
String endTimeKey = stage + "_" + "endTime";
if (jsonS1.containsKey(endTimeKey)) {
long max = StringUtil.analysisTimeKey(jsonS1, endTimeKey);
if (max > 0) {
maxTemp = max > maxTemp ? max : maxTemp;
}
}
String costTimeKey = stage + "_" + "costTime";
if (jsonS1.containsKey(costTimeKey)) {
long stageCostTime = StringUtil.analysisCostTimeKey(jsonS1, costTimeKey);
busCostTimeTotal += stageCostTime;
}
}
//耗时计算规则1 所选环节最大-最小
if (costTimeRule == 0) {
//log.warn("计算业务耗时0" + keyInDb);
if (minTemp == Long.MAX_VALUE || maxEndTime == 0) {
busJson.put(keyInDb, 0);
} else {
busJson.put(keyInDb, maxTemp - minTemp);
}
//耗时计算规则2 所选环节耗时相加
} else if (costTimeRule == 1) {
busJson.put(keyInDb, busCostTimeTotal);
} else {
log.warn("非法的业务耗时计算规则,请检查数据模型!");
}
/**
* =====================================================================================
* 判断业务是否超时:和保存在redis里的业务超时阀值对比 1:超时 0:未超时
*/
if (busJson.containsKey(keyInDb)) {
long busCostTime = busJson.getLong(keyInDb);
if (jedis.hexists(busFlag + "Threshold", keyInDb + "Threshold")) {
String bct = jedis.hget(busFlag + "Threshold", keyInDb + "Threshold");
long costTimeThreshhold = Long.parseLong(bct);
if (busCostTime >= costTimeThreshhold) {
busJson.put(keyInDb + "_" + "timeOut", 1);
} else {
busJson.put(keyInDb + "_" + "timeOut", 0);
}
} else {
log.warn("没有在redis里找到业务的超时阀值设置");
}
}
}
}
} else {
log.warn("没有读取到业务耗时的配置规则!" + busFlag);
}
/**
* =================================================================================================
* 判断业务成功失败,先加载资源模型规则表
*/
List<Row> statusRows = new ArrayList<>();
for (int i = 0; i < busBusStatus.size(); i++) {
String busCode = busBusStatus.get(i).getAs("busName").toString();
if (busCode.equals(busFlag) && busFlag != null) {//业务标识携带的是业务代码,和模型里的code对比
statusRows.add(busBusStatus.get(i));
}
}
if (statusRows.size() > 0) {
for (int i = 0; i < statusRows.size(); i++) {
String stageIds = statusRows.get(i).getAs("stageIds").toString();
String keyInDb = statusRows.get(i).getAs("keyInDb").toString();
String[] stageIdsArray = stageIds.split(",");
Set<String> stageNameSet = new HashSet();
for (int k = 0; k < stageIdsArray.length; k++) {
stageNameSet.add(stageIdToName.get(stageIdsArray[k].trim()));
}
//log.warn("-----------------" + stageNameSet.toString());
//先判断业务失败:不论业务是否完整,参与环节有一个失败就立即判断业务失败;
boolean busStatusFlag1 = false;
for (String s : stageNameSet) {
if (stages.contains(s)) {
JSONObject j = busMap.get(s);
int status = j.getInteger(s + "_" + "status");
if (status == 0) {
busJson.put(keyInDb, 0);//不管拼接完不完整出现失败状态
//log.warn("--------keyInDb--0-------" + keyInDb+"---"+i);
busStatusFlag1 = false;
break;
} else {
busStatusFlag1 = true;
}
}
}
if (busStatusFlag1) {
if (!busJson.containsKey(keyInDb)) {
busJson.put(keyInDb, -1);//拼接不完整但没有出现失败状态
//log.warn("--------keyInDb----- -1----" + keyInDb+"---"+i);
}
}
//判断业务的成功:判断规则读取数据模型配置的规则,会有多种规则;有一种成功就算成功;
boolean busStatusFlag2 = false;
if (stages.containsAll(stageNameSet)) {
//log.warn("-----------------" + keyInDb+"---"+i);
//log.warn("-----------------" + stageNameSet.toString());
//.warn("-----------------" + stages.toString());
for (String s : stageNameSet) {
JSONObject j = busMap.get(s);
int status = j.getInteger(s + "_" + "status");
if (status == 0) {
busStatusFlag2 = false;
break;
} else {
busStatusFlag2 = true;
}
}
if (busStatusFlag2 && busJson.getInteger(keyInDb) != 0) {
//log.warn("--------keyInDb-----1----" + keyInDb+"---"+i);
busJson.put(keyInDb, 1);//拼接完整没有出现失败状态即成功状态
}
}
}
} else {
log.warn("没有读取到业务状态的配置规则!" + busFlag);
}
for (String stage : stages) {
busJson.putAll(busMap.get(stage));
}
} else {
/**
* 针对不能识别为业务的数据进行处理
* todo 能否将加前缀合并?
*/
Set<String> getAllStagesb = new MyHashSet<>();//把所有环节名放入一个字段
for (String stage : stages) {
getAllStagesb.add(stage);
JSONObject js = busMap.get(stage);
Set<String> keys = js.keySet();
keys.forEach(key -> {
busJson.put(stage + "_" + key, js.get(key));
if ("costTime".equals(key)) {
long ct = StringUtil.analysisCostTimeKey(js, key);
busJson.put(stage + "_" + key, ct);
}
});
}
//这个字段里保存拼接的所有环节的环节标识
busJson.put(CoreConfig.STAGEFIELDNAME, getAllStagesb.toString());
// log.warn("=====>>>>>" + busJson.toJSONString());
}
/**
* 业务处理完毕
* =================================================================================================
* 目前使用非嵌套形式
*/
list.add(new Tuple2<>(busJson, t3._3()));
}
jedis.close();
return list.iterator();
}
});
/**
* 1、将拼接的业务日志写入es;
* 2、将之前写入es的不完整的业务日志删除;
* 3、之所以没有使用es-hadoop插件是因为不支持es删除文档
* 4、transport client is closed,解决方案:注释掉 //client1.close();
*/
busJsonAndDropUuidInEs.foreachRDD(new VoidFunction<JavaRDD<Tuple2<JSONObject, Set<String>>>>() {
@Override
public void call(JavaRDD<Tuple2<JSONObject, Set<String>>> tuple2JavaRDD) throws Exception {
tuple2JavaRDD.foreachPartition(new VoidFunction<Iterator<Tuple2<JSONObject, Set<String>>>>() {
@Override
public void call(Iterator<Tuple2<JSONObject, Set<String>>> tuple2Iterator) throws Exception {
//创建es客户端连接
TransportClient client1 = ElasticSearchUtils.getTransportClient();
BulkRequestBuilder bulkRequestBuilder1 = client1.prepareBulk();
DeleteRequestBuilder deleteRequestBuilder = client1.prepareDelete();
while (tuple2Iterator.hasNext()) {
Tuple2<JSONObject, Set<String>> tuple2 = tuple2Iterator.next();
String index = CoreConfig.BUSLOG_INDEXNAME + TimeUtil.getTableName(CoreConfig.BUSLOG_INTERVAL);
//log.warn("source=======" + tuple2._1());
//log.warn("deletedId=========" + tuple2._2().toString());
/**
* step1:去es里删除之前数据不完整的数据
*/
if (!tuple2._2().isEmpty()) {
for (String dropUuid : tuple2._2()) {
deleteRequestBuilder.setIndex(index).setType("/all").setId(dropUuid);
bulkRequestBuilder1.add(deleteRequestBuilder);
if (bulkRequestBuilder1.numberOfActions() % CoreConfig.ES_BULKSIZE == 0 && bulkRequestBuilder1.numberOfActions() != 0) {
BulkResponse bulkResponse = bulkRequestBuilder1.execute().actionGet();
log.warn(TimeUtil.formattedCurrentTime() + " 向es提交了 " + bulkRequestBuilder1.numberOfActions() + " 条操作");
if (bulkResponse.hasFailures()) {
log.warn("error in doing index request: " + bulkResponse.buildFailureMessage());
}
}
}
} else {
// log.warn(TimeUtil.formattedCurrentTime() + ":无对应需要删除的id");
}
/**
* step2:将准备的新数据写入es
*/
IndexRequestBuilder doc = client1.prepareIndex(index, "/all", tuple2._1().getString("busUuid")).setSource(tuple2._1());
bulkRequestBuilder1.add(doc);
if (bulkRequestBuilder1.numberOfActions() % CoreConfig.ES_BULKSIZE == 0 && bulkRequestBuilder1.numberOfActions() != 0) {
BulkResponse bulkResponse = bulkRequestBuilder1.execute().actionGet();
log.warn(TimeUtil.formattedCurrentTime() + " 向es提交了 " + bulkRequestBuilder1.numberOfActions() + " 条操作");
if (bulkResponse.hasFailures()) {
log.warn("error in doing index request: " + bulkResponse.buildFailureMessage());
}
}
}
/**
* 将最后不到1000条的数据执行操作:删除/写入
*/
if (bulkRequestBuilder1.numberOfActions() > 0) {
BulkResponse bulkResponse = bulkRequestBuilder1.execute().actionGet();
log.warn(TimeUtil.formattedCurrentTime() + " 向es提交了 " + bulkRequestBuilder1.numberOfActions() + " 条操作");
if (bulkResponse.hasFailures()) {
log.warn("error in doing index request: " + bulkResponse.buildFailureMessage());
}
}
//client1.close();
}
});
}
});
/**
* 更新offset
*/
kafkaLogs.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
@Override
public void call(JavaRDD<ConsumerRecord<String, String>> consumerRecordJavaRDD) throws Exception {
final OffsetRange[] offsets = ((HasOffsetRanges) consumerRecordJavaRDD.rdd()).offsetRanges();
consumerRecordJavaRDD.foreachPartition(new VoidFunction<Iterator<ConsumerRecord<String, String>>>() {
@Override
public void call(Iterator<ConsumerRecord<String, String>> consumerRecordIterator) throws Exception {
//更新offset
ZkClient zkClient = new ZkClient(CoreConfig.ZK_SERVERS, CoreConfig.ZK_SESSION_TIMEOUT,
CoreConfig.ZK_CONNECTION_TIMEOUT, new MyZkSerializer());
for (OffsetRange o : offsets) {
String zkPath = zkTopicPath + "/" + o.partition();
KafkaOffsetUtil.createZkNode(zkClient, zkPath);
zkClient.writeData(zkPath, o.untilOffset());
// log.info("Update offset data: partition[" + o.partition() + "], offset= " + o.untilOffset());
}
zkClient.close();
}
});
}
});
jsc.addStreamingListener(new StatsReportListener(2));
jsc.start();
try {
jsc.awaitTerminationOrTimeout(-1);
} catch (InterruptedException e) {
e.printStackTrace();
}
jsc.stop(true, true);
}
}