🔍 统一日志与链路追踪 Sleuth + Zipkin 实践
📋 目录
- 🎯 一、分布式追踪核心概念
- 🔍 二、Sleuth 自动埋点机制
- 🔄 三、Trace 上下文传播原理
- 📊 四、Zipkin 架构与数据流
- ⚡ 五、消息中间件集成
- 🆚 六、SkyWalking vs Jaeger 对比
- 💡 七、生产环境最佳实践
🎯 一、分布式追踪核心概念
💡 分布式追踪的基本概念
调用链追踪核心元素:
📊 追踪数据模型
Span 数据结构定义:
/**
* Span 数据模型
* 表示分布式系统中的单个工作单元
*/
@Data
@Builder
@AllArgsConstructor
public class Span {
// 标识信息
private String traceId; // 追踪ID - 全局唯一
private String spanId; // Span ID - 当前单元标识
private String parentSpanId; // 父Span ID - 用于构建调用树
private String name; // Span名称 - 操作描述
// 时间信息
private long timestamp; // 开始时间戳
private long duration; // 持续时间(微秒)
// 上下文信息
private Kind kind; // 类型:CLIENT, SERVER, PRODUCER, CONSUMER
private boolean shared; // 是否共享
private boolean debug; // 是否调试模式
// 端点信息
private Endpoint localEndpoint; // 本地服务端点
private Endpoint remoteEndpoint; // 远程服务端点
// 注解和标签
private List<Annotation> annotations; // 时间点注解
private Map<String, String> tags; // 业务标签
// 状态信息
private boolean error; // 是否发生错误
private String errorMessage; // 错误信息
/**
* Span 类型枚举
*/
public enum Kind {
CLIENT, // 客户端调用
SERVER, // 服务端处理
PRODUCER, // 消息生产者
CONSUMER // 消息消费者
}
/**
* 服务端点信息
*/
@Data
@AllArgsConstructor
public static class Endpoint {
private String serviceName; // 服务名称
private String ip; // IP地址
private int port; // 端口号
}
/**
* 时间点注解
*/
@Data
@AllArgsConstructor
public static class Annotation {
private long timestamp; // 时间戳
private String value; // 注解值
}
}
🔍 二、Sleuth 自动埋点机制
🏗️ Sleuth 自动配置架构
Sleuth 自动埋点组件:
/**
* Sleuth 自动配置核心组件
* 负责自动注入追踪上下文
*/
@Configuration
@EnableAspectJAutoProxy
@Slf4j
public class SleuthAutoConfiguration {
@Bean
@ConditionalOnMissingBean
public Tracer tracer(Sampler sampler, TraceContext traceContext) {
return new DefaultTracer(sampler, traceContext);
}
@Bean
@ConditionalOnMissingBean
public Sampler sampler() {
return Sampler.ALWAYS_SAMPLE; // 默认全量采样
}
@Bean
@ConditionalOnMissingBean
public TraceContext traceContext() {
return new DefaultTraceContext();
}
/**
* HTTP 请求拦截器 - 自动注入追踪头
*/
@Bean
public TracingFilter tracingFilter(Tracer tracer) {
return new TracingFilter(tracer);
}
/**
* 异步任务追踪支持
*/
@Bean
@ConditionalOnMissingBean
public TracingAsyncTaskExecutor tracingAsyncTaskExecutor(Tracer tracer) {
return new TracingAsyncTaskExecutor(tracer);
}
/**
* REST Template 拦截器
*/
@Bean
public RestTemplateCustomizer restTemplateCustomizer(Tracer tracer) {
return restTemplate -> {
List<ClientHttpRequestInterceptor> interceptors =
new ArrayList<>(restTemplate.getInterceptors());
interceptors.add(new TracingClientHttpRequestInterceptor(tracer));
restTemplate.setInterceptors(interceptors);
};
}
}
/**
* 默认追踪器实现
*/
@Component
@Slf4j
public class DefaultTracer implements Tracer {
private final Sampler sampler;
private final TraceContext traceContext;
private final Random random = new Random();
public DefaultTracer(Sampler sampler, TraceContext traceContext) {
this.sampler = sampler;
this.traceContext = traceContext;
}
@Override
public Span nextSpan() {
// 从当前上下文获取或创建新的Span
Span currentSpan = traceContext.getCurrentSpan();
if (currentSpan != null) {
return createChildSpan(currentSpan);
} else {
return createRootSpan();
}
}
@Override
public Span nextSpan(Span parent) {
if (parent == null) {
return createRootSpan();
}
return createChildSpan(parent);
}
/**
* 创建根Span
*/
private Span createRootSpan() {
if (!sampler.isSampled()) {
return Span.builder().sampled(false).build();
}
String traceId = generateTraceId();
String spanId = generateSpanId();
return Span.builder()
.traceId(traceId)
.spanId(spanId)
.parentSpanId(null)
.name("root")
.timestamp(System.currentTimeMillis())
.kind(Span.Kind.SERVER)
.sampled(true)
.build();
}
/**
* 创建子Span
*/
private Span createChildSpan(Span parent) {
if (!parent.isSampled()) {
return Span.builder().sampled(false).build();
}
String spanId = generateSpanId();
return Span.builder()
.traceId(parent.getTraceId())
.spanId(spanId)
.parentSpanId(parent.getSpanId())
.name("child")
.timestamp(System.currentTimeMillis())
.kind(Span.Kind.CLIENT)
.sampled(true)
.localEndpoint(parent.getLocalEndpoint())
.build();
}
/**
* 生成Trace ID(128位)
*/
private String generateTraceId() {
byte[] bytes = new byte[16];
random.nextBytes(bytes);
return Hex.encodeHexString(bytes);
}
/**
* 生成Span ID(64位)
*/
private String generateSpanId() {
byte[] bytes = new byte[8];
random.nextBytes(bytes);
return Hex.encodeHexString(bytes);
}
@Override
public void close(Span span) {
if (span != null && span.isSampled()) {
span.setDuration(System.currentTimeMillis() - span.getTimestamp());
log.debug("Span关闭: traceId={}, spanId={}, duration={}ms",
span.getTraceId(), span.getSpanId(), span.getDuration());
// 报告Span到收集器
reportSpan(span);
}
}
}
🔄 HTTP 请求自动追踪
HTTP 拦截器实现:
/**
* HTTP 请求追踪过滤器
* 自动注入和传播追踪头
*/
@Component
@Slf4j
public class TracingFilter implements Filter {
private final Tracer tracer;
private static final String TRACE_HEADER = "X-B3-TraceId";
private static final String SPAN_HEADER = "X-B3-SpanId";
private static final String PARENT_HEADER = "X-B3-ParentSpanId";
private static final String SAMPLED_HEADER = "X-B3-Sampled";
public TracingFilter(Tracer tracer) {
this.tracer = tracer;
}
@Override
public void doFilter(ServletRequest request, ServletResponse response,
FilterChain chain) throws IOException, ServletException {
HttpServletRequest httpRequest = (HttpServletRequest) request;
HttpServletResponse httpResponse = (HttpServletResponse) response;
// 1. 从请求头提取或创建Span
Span span = extractOrCreateSpan(httpRequest);
try {
// 2. 将Span放入上下文
tracer.getTraceContext().setCurrentSpan(span);
// 3. 添加追踪头到响应
addTracingHeaders(httpResponse, span);
// 4. 记录服务器接收事件
logServerReceived(span, httpRequest);
// 5. 继续处理请求
chain.doFilter(request, response);
// 6. 记录服务器发送事件
logServerSent(span, httpResponse);
} catch (Exception e) {
// 7. 记录错误信息
span.setError(true);
span.setErrorMessage(e.getMessage());
span.tag("error", "true");
throw e;
} finally {
// 8. 关闭Span
tracer.close(span);
// 9. 清理上下文
tracer.getTraceContext().clear();
}
}
/**
* 从请求头提取或创建新的Span
*/
private Span extractOrCreateSpan(HttpServletRequest request) {
String traceId = request.getHeader(TRACE_HEADER);
String spanId = request.getHeader(SPAN_HEADER);
String parentSpanId = request.getHeader(PARENT_HEADER);
String sampled = request.getHeader(SAMPLED_HEADER);
if (traceId != null && spanId != null) {
// 从头部信息构建Span
return Span.builder()
.traceId(traceId)
.spanId(spanId)
.parentSpanId(parentSpanId)
.sampled("1".equals(sampled))
.kind(Span.Kind.SERVER)
.timestamp(System.currentTimeMillis())
.name(request.getMethod() + " " + request.getRequestURI())
.localEndpoint(buildEndpoint(request))
.build();
} else {
// 创建新的根Span
return tracer.nextSpan()
.name(request.getMethod() + " " + request.getRequestURI())
.kind(Span.Kind.SERVER)
.localEndpoint(buildEndpoint(request));
}
}
/**
* 添加追踪头到响应
*/
private void addTracingHeaders(HttpServletResponse response, Span span) {
if (span.isSampled()) {
response.setHeader(TRACE_HEADER, span.getTraceId());
response.setHeader(SPAN_HEADER, span.getSpanId());
response.setHeader(SAMPLED_HEADER, "1");
}
}
/**
* 记录服务器接收事件
*/
private void logServerReceived(Span span, HttpServletRequest request) {
if (span.isSampled()) {
span.annotation(new Annotation(System.currentTimeMillis(), "sr"));
// 添加HTTP相关标签
span.tag("http.method", request.getMethod());
span.tag("http.path", request.getRequestURI());
span.tag("http.host", request.getServerName());
span.tag("http.user_agent", request.getHeader("User-Agent"));
log.debug("服务器接收请求: {} {}, traceId: {}",
request.getMethod(), request.getRequestURI(), span.getTraceId());
}
}
/**
* 记录服务器发送事件
*/
private void logServerSent(Span span, HttpServletResponse response) {
if (span.isSampled()) {
span.annotation(new Annotation(System.currentTimeMillis(), "ss"));
span.tag("http.status_code", String.valueOf(response.getStatus()));
log.debug("服务器发送响应: status={}, traceId={}",
response.getStatus(), span.getTraceId());
}
}
}
🔄 三、Trace 上下文传播原理
📡 上下文传播机制
Trace 上下文跨服务传播:
/**
* 追踪上下文管理器
* 负责跨线程、跨服务传播追踪上下文
*/
@Component
@Slf4j
public class TraceContext {
private final ThreadLocal<Span> currentSpan = new ThreadLocal<>();
private final ThreadLocal<Map<String, String>> extraContext = new ThreadLocal<>();
/**
* 获取当前Span
*/
public Span getCurrentSpan() {
return currentSpan.get();
}
/**
* 设置当前Span
*/
public void setCurrentSpan(Span span) {
currentSpan.set(span);
if (span != null) {
log.debug("设置当前Span: traceId={}, spanId={}",
span.getTraceId(), span.getSpanId());
}
}
/**
* 清除上下文
*/
public void clear() {
currentSpan.remove();
if (extraContext.get() != null) {
extraContext.get().clear();
}
extraContext.remove();
}
/**
* 注入追踪头到HTTP请求
*/
public void inject(HttpHeaders headers) {
Span span = getCurrentSpan();
if (span != null && span.isSampled()) {
headers.set("X-B3-TraceId", span.getTraceId());
headers.set("X-B3-SpanId", span.getSpanId());
headers.set("X-B3-ParentSpanId", span.getParentSpanId());
headers.set("X-B3-Sampled", "1");
// 注入自定义上下文
injectCustomContext(headers);
}
}
/**
* 从HTTP请求头提取上下文
*/
public Span extract(HttpHeaders headers) {
String traceId = headers.getFirst("X-B3-TraceId");
String spanId = headers.getFirst("X-B3-SpanId");
String parentSpanId = headers.getFirst("X-B3-ParentSpanId");
String sampled = headers.getFirst("X-B3-Sampled");
if (traceId != null && spanId != null) {
return Span.builder()
.traceId(traceId)
.spanId(spanId)
.parentSpanId(parentSpanId)
.sampled("1".equals(sampled))
.build();
}
return null;
}
}
/**
* 跨线程上下文传播
*/
@Component
@Slf4j
public class TraceContextExecutor {
private final TraceContext traceContext;
/**
* 包装Runnable以传播追踪上下文
*/
public Runnable wrap(Runnable task) {
Span currentSpan = traceContext.getCurrentSpan();
Map<String, String> currentContext = copyExtraContext();
return () -> {
try {
// 在新的线程中恢复上下文
traceContext.setCurrentSpan(currentSpan);
if (currentContext != null) {
traceContext.getExtraContext().putAll(currentContext);
}
task.run();
} finally {
traceContext.clear();
}
};
}
/**
* 包装Callable以传播追踪上下文
*/
public <T> Callable<T> wrap(Callable<T> task) {
Span currentSpan = traceContext.getCurrentSpan();
Map<String, String> currentContext = copyExtraContext();
return () -> {
try {
// 在新的线程中恢复上下文
traceContext.setCurrentSpan(currentSpan);
if (currentContext != null) {
traceContext.getExtraContext().putAll(currentContext);
}
return task.call();
} finally {
traceContext.clear();
}
};
}
/**
* 异步任务执行器
*/
@Component
public class TracingAsyncTaskExecutor implements AsyncTaskExecutor {
private final AsyncTaskExecutor delegate;
private final TraceContextExecutor contextExecutor;
@Override
public void execute(Runnable task, long startTimeout) {
delegate.execute(contextExecutor.wrap(task), startTimeout);
}
@Override
public Future<?> submit(Runnable task) {
return delegate.submit(contextExecutor.wrap(task));
}
@Override
public <T> Future<T> submit(Callable<T> task) {
return delegate.submit(contextExecutor.wrap(task));
}
}
}
🔄 Feign 客户端集成
Feign 客户端追踪集成:
/**
* Feign 客户端追踪拦截器
*/
@Component
@Slf4j
public class TracingFeignInterceptor implements RequestInterceptor {
private final Tracer tracer;
private final TraceContext traceContext;
@Override
public void apply(RequestTemplate template) {
Span currentSpan = traceContext.getCurrentSpan();
if (currentSpan != null && currentSpan.isSampled()) {
// 创建客户端Span
Span clientSpan = tracer.nextSpan(currentSpan)
.name("feign:" + template.method() + " " + template.url())
.kind(Span.Kind.CLIENT)
.timestamp(System.currentTimeMillis());
// 记录客户端发送事件
clientSpan.annotation(new Annotation(System.currentTimeMillis(), "cs"));
// 注入追踪头
template.header("X-B3-TraceId", clientSpan.getTraceId());
template.header("X-B3-SpanId", clientSpan.getSpanId());
template.header("X-B3-ParentSpanId", currentSpan.getSpanId());
template.header("X-B3-Sampled", "1");
// 添加业务标签
clientSpan.tag("http.method", template.method());
clientSpan.tag("http.url", template.url());
clientSpan.tag("component", "feign");
// 将客户端Span保存到请求属性中
template.attribute("clientSpan", clientSpan);
log.debug("Feign请求追踪: {} {}, traceId: {}",
template.method(), template.url(), clientSpan.getTraceId());
}
}
/**
* Feign 响应处理
*/
@Component
public class TracingFeignLogger extends feign.Logger {
@Override
protected void log(String configKey, String format, Object... args) {
if (log.isDebugEnabled()) {
log.debug(format, args);
}
}
@Override
protected void logRequest(String configKey, Level logLevel, Request request) {
// 记录请求日志
if (log.isDebugEnabled()) {
super.logRequest(configKey, logLevel, request);
}
}
@Override
protected Response logAndRebufferResponse(String configKey, Level logLevel,
Response response, long elapsedTime) throws IOException {
// 记录响应并处理Span
Request request = response.request();
Span clientSpan = (Span) request.requestTemplate().attribute("clientSpan");
if (clientSpan != null) {
// 记录客户端接收事件
clientSpan.annotation(new Annotation(System.currentTimeMillis(), "cr"));
clientSpan.tag("http.status_code", String.valueOf(response.status()));
clientSpan.setDuration(elapsedTime * 1000); // 转换为微秒
// 关闭客户端Span
tracer.close(clientSpan);
}
return super.logAndRebufferResponse(configKey, logLevel, response, elapsedTime);
}
}
}
📊 四、Zipkin 架构与数据流
🏗️ Zipkin 系统架构
Zipkin 数据流架构:
🔧 Zipkin 集成配置
Spring Cloud Sleuth Zipkin 配置:
# application.yml Zipkin配置
spring:
zipkin:
# Zipkin服务器地址
base-url: http://zipkin-server:9411
# 启用Zipkin报告
enabled: true
# 服务名称
service:
name: user-service
# 定位信息
location:
# 自动发现(通过服务发现)
discovery:
enabled: true
# 或者直接指定
host: localhost
port: 9411
# 发送配置
sender:
type: web # 支持web, kafka, rabbit
# 压缩配置
compression:
enabled: true
# 连接配置
connect-timeout: 5000
read-timeout: 10000
sleuth:
# 采样率配置
sampler:
probability: 1.0 # 1.0表示100%采样
# HTTP请求追踪
web:
client:
enabled: true
# 跳过某些路径
skip-pattern: /health,/info
# 异步追踪
async:
enabled: true
# 消息追踪
messaging:
enabled: true
# 调度任务追踪
schedule:
enabled: true
# Redis追踪
redis:
enabled: true
# 数据库追踪
jdbc:
enabled: true
# Zipkin客户端高级配置
management:
endpoints:
web:
exposure:
include: health,info,metrics,zipkin
endpoint:
zipkin:
enabled: true
metrics:
export:
zipkin:
enabled: true
# 日志配置(显示TraceId)
logging:
pattern:
level: "%5p [${spring.zipkin.service.name:},%X{traceId:-},%X{spanId:-}]"
📡 Zipkin 报告器实现
Zipkin Span 报告器:
/**
* Zipkin Span 报告器
* 负责将Span数据发送到Zipkin服务器
*/
@Component
@Slf4j
public class ZipkinSpanReporter implements SpanReporter {
private final ZipkinRestTemplateSender sender;
private final ObjectMapper objectMapper;
private final MeterRegistry meterRegistry;
// 指标监控
private final Counter spansSentCounter;
private final Counter spansDroppedCounter;
private final Timer sendTimer;
public ZipkinSpanReporter(ZipkinRestTemplateSender sender,
ObjectMapper objectMapper,
MeterRegistry meterRegistry) {
this.sender = sender;
this.objectMapper = objectMapper;
this.meterRegistry = meterRegistry;
// 初始化指标
this.spansSentCounter = meterRegistry.counter("zipkin.spans.sent");
this.spansDroppedCounter = meterRegistry.counter("zipkin.spans.dropped");
this.sendTimer = meterRegistry.timer("zipkin.send.duration");
}
@Override
public void report(Span span) {
if (!span.isSampled()) {
spansDroppedCounter.increment();
return;
}
try {
// 转换Span为Zipkin格式
zipkin2.Span zipkinSpan = convertToZipkinSpan(span);
// 发送Span到Zipkin
sendTimer.record(() -> {
try {
sender.sendSpans(Collections.singletonList(zipkinSpan));
spansSentCounter.increment();
if (log.isDebugEnabled()) {
log.debug("Span发送成功: traceId={}, spanId={}",
span.getTraceId(), span.getSpanId());
}
} catch (Exception e) {
log.error("Span发送失败", e);
spansDroppedCounter.increment();
}
});
} catch (Exception e) {
log.error("Span转换失败", e);
spansDroppedCounter.increment();
}
}
/**
* 转换内部Span为Zipkin格式
*/
private zipkin2.Span convertToZipkinSpan(Span span) {
zipkin2.Span.Builder builder = zipkin2.Span.newBuilder()
.traceId(span.getTraceId())
.id(span.getSpanId())
.name(span.getName())
.timestamp(span.getTimestamp() * 1000) // 转换为微秒
.duration(span.getDuration() * 1000); // 转换为微秒
// 设置父Span
if (span.getParentSpanId() != null) {
builder.parentId(span.getParentSpanId());
}
// 设置本地端点
if (span.getLocalEndpoint() != null) {
builder.localEndpoint(zipkin2.Endpoint.newBuilder()
.serviceName(span.getLocalEndpoint().getServiceName())
.ip(span.getLocalEndpoint().getIp())
.port(span.getLocalEndpoint().getPort())
.build());
}
// 设置远程端点
if (span.getRemoteEndpoint() != null) {
builder.remoteEndpoint(zipkin2.Endpoint.newBuilder()
.serviceName(span.getRemoteEndpoint().getServiceName())
.ip(span.getRemoteEndpoint().getIp())
.port(span.getRemoteEndpoint().getPort())
.build());
}
// 添加注解
if (span.getAnnotations() != null) {
for (Annotation annotation : span.getAnnotations()) {
builder.addAnnotation(annotation.getTimestamp() * 1000, annotation.getValue());
}
}
// 添加标签
if (span.getTags() != null) {
for (Map.Entry<String, String> tag : span.getTags().entrySet()) {
builder.putTag(tag.getKey(), tag.getValue());
}
}
// 设置Kind
if (span.getKind() != null) {
switch (span.getKind()) {
case CLIENT:
builder.kind(zipkin2.Span.Kind.CLIENT);
break;
case SERVER:
builder.kind(zipkin2.Span.Kind.SERVER);
break;
case PRODUCER:
builder.kind(zipkin2.Span.Kind.PRODUCER);
break;
case CONSUMER:
builder.kind(zipkin2.Span.Kind.CONSUMER);
break;
}
}
// 设置共享标志
if (span.isShared()) {
builder.shared(true);
}
// 设置调试标志
if (span.isDebug()) {
builder.debug(true);
}
return builder.build();
}
}
/**
* Zipkin REST API 发送器
*/
@Component
@Slf4j
public class ZipkinRestTemplateSender {
private final RestTemplate restTemplate;
private final String zipkinBaseUrl;
private final ObjectMapper objectMapper;
public ZipkinRestTemplateSender(RestTemplate restTemplate,
@Value("${spring.zipkin.base-url}") String zipkinBaseUrl,
ObjectMapper objectMapper) {
this.restTemplate = restTemplate;
this.zipkinBaseUrl = zipkinBaseUrl;
this.objectMapper = objectMapper;
}
/**
* 批量发送Span到Zipkin
*/
public void sendSpans(List<zipkin2.Span> spans) {
if (spans.isEmpty()) {
return;
}
try {
// 序列化Span列表
String jsonSpans = objectMapper.writeValueAsString(spans);
// 构建请求
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
headers.set("Content-Encoding", "gzip");
HttpEntity<byte[]> request = new HttpEntity<>(
gzipCompress(jsonSpans), headers);
// 发送到Zipkin
ResponseEntity<String> response = restTemplate.postForEntity(
zipkinBaseUrl + "/api/v2/spans", request, String.class);
if (!response.getStatusCode().is2xxSuccessful()) {
log.warn("Zipkin响应异常: {}", response.getStatusCode());
}
} catch (Exception e) {
throw new RuntimeException("发送Span到Zipkin失败", e);
}
}
/**
* GZIP压缩
*/
private byte[] gzipCompress(String data) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(data.length());
GZIPOutputStream gzip = new GZIPOutputStream(bos);
gzip.write(data.getBytes(StandardCharsets.UTF_8));
gzip.close();
return bos.toByteArray();
}
}
⚡ 五、消息中间件集成
📨 Kafka 消息追踪
Kafka 消息追踪集成:
/**
* Kafka 消息追踪支持
* 自动注入和提取追踪上下文
*/
@Configuration
@Slf4j
public class KafkaTracingConfiguration {
@Bean
public TracingProducerFactory<String, String> tracingProducerFactory(
ProducerFactory<String, String> producerFactory,
Tracer tracer) {
return new TracingProducerFactory<>(producerFactory, tracer);
}
@Bean
public TracingConsumerFactory<String, String> tracingConsumerFactory(
ConsumerFactory<String, String> consumerFactory,
Tracer tracer) {
return new TracingConsumerFactory<>(consumerFactory, tracer);
}
}
/**
* 追踪生产者工厂
*/
@Component
@Slf4j
public class TracingProducerFactory<K, V> implements ProducerFactory<K, V> {
private final ProducerFactory<K, V> delegate;
private final Tracer tracer;
private static final String TRACE_HEADER = "traceContext";
@Override
public Producer<K, V> createProducer() {
return new TracingProducer<>(delegate.createProducer(), tracer);
}
/**
* 追踪生产者包装器
*/
private static class TracingProducer<K, V> implements Producer<K, V> {
private final Producer<K, V> delegate;
private final Tracer tracer;
public TracingProducer(Producer<K, V> delegate, Tracer tracer) {
this.delegate = delegate;
this.tracer = tracer;
}
@Override
public Future<RecordMetadata> send(ProducerRecord<K, V> record) {
// 注入追踪上下文到消息头
ProducerRecord<K, V> tracedRecord = injectTraceContext(record);
return delegate.send(tracedRecord);
}
@Override
public Future<RecordMetadata> send(ProducerRecord<K, V> record, Callback callback) {
ProducerRecord<K, V> tracedRecord = injectTraceContext(record);
return delegate.send(tracedRecord, callback);
}
private ProducerRecord<K, V> injectTraceContext(ProducerRecord<K, V> record) {
Span currentSpan = tracer.getTraceContext().getCurrentSpan();
if (currentSpan != null && currentSpan.isSampled()) {
// 创建生产者Span
Span producerSpan = tracer.nextSpan(currentSpan)
.name("kafka:produce:" + record.topic())
.kind(Span.Kind.PRODUCER)
.timestamp(System.currentTimeMillis());
// 记录生产者发送事件
producerSpan.annotation(new Annotation(System.currentTimeMillis(), "ms"));
producerSpan.tag("messaging.system", "kafka");
producerSpan.tag("messaging.destination", record.topic());
producerSpan.tag("messaging.destination_kind", "topic");
// 注入到消息头
Headers headers = record.headers();
headers.add("X-B3-TraceId", producerSpan.getTraceId().getBytes());
headers.add("X-B3-SpanId", producerSpan.getSpanId().getBytes());
headers.add("X-B3-ParentSpanId", currentSpan.getSpanId().getBytes());
headers.add("X-B3-Sampled", "1".getBytes());
// 添加自定义追踪头
headers.add("X-Trace-Service", "user-service".getBytes());
log.debug("Kafka消息追踪 - 发送: topic={}, traceId={}",
record.topic(), producerSpan.getTraceId());
// 立即关闭生产者Span(异步发送)
tracer.close(producerSpan);
}
return record;
}
}
}
🆚 六、SkyWalking vs Jaeger 对比
📊 功能对比分析
三大追踪系统对比:
| 特性 | Spring Cloud Sleuth + Zipkin | SkyWalking | Jaeger |
|---|---|---|---|
| 架构模式 | 客户端-服务器 | 探针-收集器 | 客户端-收集器 |
| 数据存储 | ES, MySQL, Cassandra | ES, H2, MySQL, TiDB | Cassandra, ES, Kafka |
| UI界面 | Zipkin UI | SkyWalking UI | Jaeger UI |
| 语言支持 | Java为主,多语言支持 | 多语言探针 | 多语言客户端 |
| 性能开销 | 中等 | 低 | 低-中等 |
| 安装部署 | 简单 | 中等 | 简单 |
| 生态系统 | Spring Cloud生态 | Apache项目 | CNCF项目 |
| 监控维度 | 调用链、延迟 | 拓扑图、指标、追踪 | 分布式追踪 |
🔄 迁移到 SkyWalking
SkyWalking 配置示例:
# agent.config
# 服务名称
agent.service_name=${SW_AGENT_NAME:user-service}
# 后端服务地址
collector.backend_service=${SW_AGENT_COLLECTOR:127.0.0.1:11800}
# 采样配置
agent.sample_n_per_3_secs=${SW_AGENT_SAMPLE:-1}
# 忽略后缀
agent.ignore_suffix=${SW_AGENT_IGNORE_SUFFIX:.jpg,.jpeg,.png,.gif,.css,.js}
# 跨进程传播配置
agent.cross_process_propagation_config=${SW_AGENT_CROSS_PROPAGATION:true}
💡 七、生产环境最佳实践
🔧 生产级配置
Sleuth + Zipkin 生产配置:
spring:
sleuth:
# 采样配置
sampler:
probability: 0.1 # 生产环境建议10%采样率
# 日志关联
log:
slf4j:
whitelist-mdc-keys: traceId,spanId,parentSpanId
# 异步配置
async:
enabled: true
configurer:
enabled: true
# 调度任务配置
schedule:
enabled: true
skip-pattern: healthCheckTask
# 消息配置
messaging:
enabled: true
rabbit:
enabled: true
kafka:
enabled: true
zipkin:
# 生产环境Zipkin集群
base-url: http://zipkin-cluster:9411
# 发送配置
sender:
type: web
# 压缩启用
compression:
enabled: true
# 连接池配置
rest-template:
max-total-connections: 100
max-per-route: 20
connection-timeout: 5000
read-timeout: 10000
# 日志模式配置
logging:
pattern:
level: "%5p [${spring.application.name:-},%X{traceId:-},%X{spanId:-}]"
level:
org.springframework.cloud.sleuth: INFO
brave: WARN
zipkin2: WARN
# 监控配置
management:
endpoints:
web:
exposure:
include: health,metrics,prometheus,sleuth
endpoint:
sleuth:
enabled: true
metrics:
export:
zipkin:
enabled: true
step: 1m
🚀 性能优化建议
高并发场景优化:
/**
* 高性能追踪配置
* 针对高并发场景优化
*/
@Configuration
@Slf4j
public class HighPerformanceTracingConfig {
/**
* 异步Span报告器
* 避免阻塞业务线程
*/
@Bean
@Primary
public SpanReporter asyncSpanReporter(SpanReporter delegate) {
return new AsyncSpanReporter(delegate);
}
/**
* 批量Span报告器
* 减少网络请求
*/
@Bean
public SpanReporter batchingSpanReporter(SpanReporter delegate) {
return BatchingSpanReporter.wrap(delegate)
.maxBatchSize(100) // 最大批量大小
.maxConcurrentBatches(5) // 最大并发批次
.batchInterval(Duration.ofSeconds(5)) // 批量间隔
.build();
}
/**
* 采样策略优化
*/
@Bean
public Sampler adaptiveSampler() {
return new AdaptiveSampler()
.baseProbability(0.01) // 基础采样率1%
.maxProbability(0.5) // 最大采样率50%
.windowSize(1000) // 采样窗口
.build();
}
/**
* 自适应采样器
*/
@Slf4j
public static class AdaptiveSampler implements Sampler {
private final double baseProbability;
private final double maxProbability;
private final int windowSize;
private final AtomicInteger requestCount = new AtomicInteger(0);
private final AtomicInteger sampleCount = new AtomicInteger(0);
private volatile double currentProbability;
public AdaptiveSampler(double baseProbability, double maxProbability, int windowSize) {
this.baseProbability = baseProbability;
this.maxProbability = maxProbability;
this.windowSize = windowSize;
this.currentProbability = baseProbability;
}
@Override
public boolean isSampled() {
int total = requestCount.incrementAndGet();
// 滑动窗口
if (total % windowSize == 0) {
adjustSamplingRate();
}
// 概率采样
return Math.random() < currentProbability;
}
private void adjustSamplingRate() {
int sampled = sampleCount.get();
double actualRate = (double) sampled / windowSize;
// 动态调整采样率
if (actualRate < baseProbability * 0.5) {
currentProbability = Math.min(currentProbability * 1.2, maxProbability);
} else if (actualRate > baseProbability * 1.5) {
currentProbability = Math.max(currentProbability * 0.8, baseProbability);
}
// 重置计数器
requestCount.set(0);
sampleCount.set(0);
log.info("调整采样率: {} -> {}", actualRate, currentProbability);
}
}
}
洞察:分布式追踪是微服务可观测性的核心支柱。合理的采样策略、高效的数据收集和智能的上下文传播,是构建生产级追踪系统的关键。理解数据流和性能影响,才能在业务需求和系统开销之间找到最佳平衡。
如果觉得本文对你有帮助,请点击 👍 点赞 + ⭐ 收藏 + 💬 留言支持!
讨论话题:
- 你在生产环境中使用哪种分布式追踪方案?有什么经验教训?
- 如何设计合理的采样策略来平衡性能和可观测性?
- 在跨语言微服务架构中如何实现完整的链路追踪?
相关资源推荐:
- 📚 https://spring.io/projects/spring-cloud-sleuth
- 🔧 https://zipkin.io/
- 💻 https://github.com/example/sleuth-zipkin-demo
4260

被折叠的 条评论
为什么被折叠?



