zipkin-dependencies离线计算拓扑图依赖
本文分析mysql存储(后续准备接入Doris直接计算依赖关系,所以分析不关注存储)
查找main方法 ZipkinDependenciesJob
case "mysql":
MySQLDependenciesJob.builder()
.logInitializer(logInitializer)
.jars(jarPath)
.day(day)
.conf(sparkConf)
.build()
.run();
break;
zipkin2.dependencies.mysql.MySQLDependenciesJob里面包含内部类Builder,分析run方法
public void run() {
//数据库信息
Map<String, String> options = new LinkedHashMap<>();
options.put("driver", org.mariadb.jdbc.Driver.class.getName()); // prevents shade from skipping
options.put("url", url);
options.put("user", user);
options.put("password", password);
// 如果 trace_id_high == 1,则跟踪使用128位tranceId取代64位
// 如果 rance_id_high == 0,用64位 "select trace_id_high from zipkin_spans limit 1"
boolean hasTraceIdHigh = hasTraceIdHigh();
Function<Row, Long> rowTraceId = r -> r.getLong(hasTraceIdHigh ? 1 : 0);
long microsLower = day * 1000;
long microsUpper = (day * 1000) + TimeUnit.DAYS.toMicros(1) - 1;
//查询span信息
String fields = "s.trace_id, s.parent_id, s.id, a.a_key, a.endpoint_service_name, a.a_type";
if (hasTraceIdHigh) fields = "s.trace_id_high, " + fields;
String groupByFields = fields.replace("s.parent_id, ", "");
String linksQuery = String.format(
"select distinct %s "+
"from zipkin_spans s left outer join zipkin_annotations a on " +
" (s.trace_id = a.trace_id and s.id = a.span_id " +
" and a.a_key in ('lc', 'ca', 'cs', 'sa', 'sr', 'ma', 'ms', 'mr', 'error')) " +
"where s.start_ts between %s and %s group by %s",
fields, microsLower, microsUpper, groupByFields);
options.put("dbtable", "(" + linksQuery + ") as link_spans");
log.info("Running Dependencies job for {}: start_ts between {} and {}", dateStamp, microsLower,
microsUpper);
JavaSparkContext sc = new JavaSparkContext(conf);
List<DependencyLink> links = new SQLContext(sc).read()
.format("org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider")
.options(options)
.load()
// RDD为spark抽象
.toJavaRDD()
// 按照最高位分组 rowTraceId是一个function<Row,Long>
.groupBy(rowTraceId)
// 按照RowsTo