统一日志与链路追踪 Sleuth + Zipkin 实践

🔍 统一日志与链路追踪 Sleuth + Zipkin 实践

📋 目录

  • 🎯 一、分布式追踪核心概念
  • 🔍 二、Sleuth 自动埋点机制
  • 🔄 三、Trace 上下文传播原理
  • 📊 四、Zipkin 架构与数据流
  • ⚡ 五、消息中间件集成
  • 🆚 六、SkyWalking vs Jaeger 对比
  • 💡 七、生产环境最佳实践

🎯 一、分布式追踪核心概念

💡 分布式追踪的基本概念

调用链追踪核心元素

Trace - 完整调用链
Span - 单个工作单元
TraceId - 全局唯一标识
SpanId - 当前Span标识
ParentId - 父Span标识
Annotations - 时间点事件
Client Sent
Server Received
Server Sent
Client Received
Tags - 业务标签
HTTP方法
URL路径
状态码
错误信息

📊 追踪数据模型

Span 数据结构定义

/**
 * Span 数据模型
 * 表示分布式系统中的单个工作单元
 */
@Data
@Builder
@AllArgsConstructor
public class Span {
    // 标识信息
    private String traceId;          // 追踪ID - 全局唯一
    private String spanId;           // Span ID - 当前单元标识
    private String parentSpanId;     // 父Span ID - 用于构建调用树
    private String name;             // Span名称 - 操作描述
    
    // 时间信息
    private long timestamp;          // 开始时间戳
    private long duration;           // 持续时间(微秒)
    
    // 上下文信息
    private Kind kind;               // 类型:CLIENT, SERVER, PRODUCER, CONSUMER
    private boolean shared;          // 是否共享
    private boolean debug;           // 是否调试模式
    
    // 端点信息
    private Endpoint localEndpoint;  // 本地服务端点
    private Endpoint remoteEndpoint; // 远程服务端点
    
    // 注解和标签
    private List<Annotation> annotations;  // 时间点注解
    private Map<String, String> tags;      // 业务标签
    
    // 状态信息
    private boolean error;           // 是否发生错误
    private String errorMessage;     // 错误信息
    
    /**
     * Span 类型枚举
     */
    public enum Kind {
        CLIENT,     // 客户端调用
        SERVER,     // 服务端处理
        PRODUCER,   // 消息生产者
        CONSUMER    // 消息消费者
    }
    
    /**
     * 服务端点信息
     */
    @Data
    @AllArgsConstructor
    public static class Endpoint {
        private String serviceName;  // 服务名称
        private String ip;           // IP地址
        private int port;            // 端口号
    }
    
    /**
     * 时间点注解
     */
    @Data
    @AllArgsConstructor
    public static class Annotation {
        private long timestamp;     // 时间戳
        private String value;        // 注解值
    }
}

🔍 二、Sleuth 自动埋点机制

🏗️ Sleuth 自动配置架构

Sleuth 自动埋点组件

/**
 * Sleuth 自动配置核心组件
 * 负责自动注入追踪上下文
 */
@Configuration
@EnableAspectJAutoProxy
@Slf4j
public class SleuthAutoConfiguration {
    
    @Bean
    @ConditionalOnMissingBean
    public Tracer tracer(Sampler sampler, TraceContext traceContext) {
        return new DefaultTracer(sampler, traceContext);
    }
    
    @Bean
    @ConditionalOnMissingBean
    public Sampler sampler() {
        return Sampler.ALWAYS_SAMPLE; // 默认全量采样
    }
    
    @Bean
    @ConditionalOnMissingBean
    public TraceContext traceContext() {
        return new DefaultTraceContext();
    }
    
    /**
     * HTTP 请求拦截器 - 自动注入追踪头
     */
    @Bean
    public TracingFilter tracingFilter(Tracer tracer) {
        return new TracingFilter(tracer);
    }
    
    /**
     * 异步任务追踪支持
     */
    @Bean
    @ConditionalOnMissingBean
    public TracingAsyncTaskExecutor tracingAsyncTaskExecutor(Tracer tracer) {
        return new TracingAsyncTaskExecutor(tracer);
    }
    
    /**
     * REST Template 拦截器
     */
    @Bean
    public RestTemplateCustomizer restTemplateCustomizer(Tracer tracer) {
        return restTemplate -> {
            List<ClientHttpRequestInterceptor> interceptors = 
                new ArrayList<>(restTemplate.getInterceptors());
            interceptors.add(new TracingClientHttpRequestInterceptor(tracer));
            restTemplate.setInterceptors(interceptors);
        };
    }
}

/**
 * 默认追踪器实现
 */
@Component
@Slf4j
public class DefaultTracer implements Tracer {
    
    private final Sampler sampler;
    private final TraceContext traceContext;
    private final Random random = new Random();
    
    public DefaultTracer(Sampler sampler, TraceContext traceContext) {
        this.sampler = sampler;
        this.traceContext = traceContext;
    }
    
    @Override
    public Span nextSpan() {
        // 从当前上下文获取或创建新的Span
        Span currentSpan = traceContext.getCurrentSpan();
        if (currentSpan != null) {
            return createChildSpan(currentSpan);
        } else {
            return createRootSpan();
        }
    }
    
    @Override
    public Span nextSpan(Span parent) {
        if (parent == null) {
            return createRootSpan();
        }
        return createChildSpan(parent);
    }
    
    /**
     * 创建根Span
     */
    private Span createRootSpan() {
        if (!sampler.isSampled()) {
            return Span.builder().sampled(false).build();
        }
        
        String traceId = generateTraceId();
        String spanId = generateSpanId();
        
        return Span.builder()
            .traceId(traceId)
            .spanId(spanId)
            .parentSpanId(null)
            .name("root")
            .timestamp(System.currentTimeMillis())
            .kind(Span.Kind.SERVER)
            .sampled(true)
            .build();
    }
    
    /**
     * 创建子Span
     */
    private Span createChildSpan(Span parent) {
        if (!parent.isSampled()) {
            return Span.builder().sampled(false).build();
        }
        
        String spanId = generateSpanId();
        
        return Span.builder()
            .traceId(parent.getTraceId())
            .spanId(spanId)
            .parentSpanId(parent.getSpanId())
            .name("child")
            .timestamp(System.currentTimeMillis())
            .kind(Span.Kind.CLIENT)
            .sampled(true)
            .localEndpoint(parent.getLocalEndpoint())
            .build();
    }
    
    /**
     * 生成Trace ID(128位)
     */
    private String generateTraceId() {
        byte[] bytes = new byte[16];
        random.nextBytes(bytes);
        return Hex.encodeHexString(bytes);
    }
    
    /**
     * 生成Span ID(64位)
     */
    private String generateSpanId() {
        byte[] bytes = new byte[8];
        random.nextBytes(bytes);
        return Hex.encodeHexString(bytes);
    }
    
    @Override
    public void close(Span span) {
        if (span != null && span.isSampled()) {
            span.setDuration(System.currentTimeMillis() - span.getTimestamp());
            log.debug("Span关闭: traceId={}, spanId={}, duration={}ms", 
                     span.getTraceId(), span.getSpanId(), span.getDuration());
            
            // 报告Span到收集器
            reportSpan(span);
        }
    }
}

🔄 HTTP 请求自动追踪

HTTP 拦截器实现

/**
 * HTTP 请求追踪过滤器
 * 自动注入和传播追踪头
 */
@Component
@Slf4j
public class TracingFilter implements Filter {
    
    private final Tracer tracer;
    private static final String TRACE_HEADER = "X-B3-TraceId";
    private static final String SPAN_HEADER = "X-B3-SpanId";
    private static final String PARENT_HEADER = "X-B3-ParentSpanId";
    private static final String SAMPLED_HEADER = "X-B3-Sampled";
    
    public TracingFilter(Tracer tracer) {
        this.tracer = tracer;
    }
    
    @Override
    public void doFilter(ServletRequest request, ServletResponse response, 
                        FilterChain chain) throws IOException, ServletException {
        
        HttpServletRequest httpRequest = (HttpServletRequest) request;
        HttpServletResponse httpResponse = (HttpServletResponse) response;
        
        // 1. 从请求头提取或创建Span
        Span span = extractOrCreateSpan(httpRequest);
        
        try {
            // 2. 将Span放入上下文
            tracer.getTraceContext().setCurrentSpan(span);
            
            // 3. 添加追踪头到响应
            addTracingHeaders(httpResponse, span);
            
            // 4. 记录服务器接收事件
            logServerReceived(span, httpRequest);
            
            // 5. 继续处理请求
            chain.doFilter(request, response);
            
            // 6. 记录服务器发送事件
            logServerSent(span, httpResponse);
            
        } catch (Exception e) {
            // 7. 记录错误信息
            span.setError(true);
            span.setErrorMessage(e.getMessage());
            span.tag("error", "true");
            throw e;
        } finally {
            // 8. 关闭Span
            tracer.close(span);
            // 9. 清理上下文
            tracer.getTraceContext().clear();
        }
    }
    
    /**
     * 从请求头提取或创建新的Span
     */
    private Span extractOrCreateSpan(HttpServletRequest request) {
        String traceId = request.getHeader(TRACE_HEADER);
        String spanId = request.getHeader(SPAN_HEADER);
        String parentSpanId = request.getHeader(PARENT_HEADER);
        String sampled = request.getHeader(SAMPLED_HEADER);
        
        if (traceId != null && spanId != null) {
            // 从头部信息构建Span
            return Span.builder()
                .traceId(traceId)
                .spanId(spanId)
                .parentSpanId(parentSpanId)
                .sampled("1".equals(sampled))
                .kind(Span.Kind.SERVER)
                .timestamp(System.currentTimeMillis())
                .name(request.getMethod() + " " + request.getRequestURI())
                .localEndpoint(buildEndpoint(request))
                .build();
        } else {
            // 创建新的根Span
            return tracer.nextSpan()
                .name(request.getMethod() + " " + request.getRequestURI())
                .kind(Span.Kind.SERVER)
                .localEndpoint(buildEndpoint(request));
        }
    }
    
    /**
     * 添加追踪头到响应
     */
    private void addTracingHeaders(HttpServletResponse response, Span span) {
        if (span.isSampled()) {
            response.setHeader(TRACE_HEADER, span.getTraceId());
            response.setHeader(SPAN_HEADER, span.getSpanId());
            response.setHeader(SAMPLED_HEADER, "1");
        }
    }
    
    /**
     * 记录服务器接收事件
     */
    private void logServerReceived(Span span, HttpServletRequest request) {
        if (span.isSampled()) {
            span.annotation(new Annotation(System.currentTimeMillis(), "sr"));
            
            // 添加HTTP相关标签
            span.tag("http.method", request.getMethod());
            span.tag("http.path", request.getRequestURI());
            span.tag("http.host", request.getServerName());
            span.tag("http.user_agent", request.getHeader("User-Agent"));
            
            log.debug("服务器接收请求: {} {}, traceId: {}", 
                     request.getMethod(), request.getRequestURI(), span.getTraceId());
        }
    }
    
    /**
     * 记录服务器发送事件
     */
    private void logServerSent(Span span, HttpServletResponse response) {
        if (span.isSampled()) {
            span.annotation(new Annotation(System.currentTimeMillis(), "ss"));
            span.tag("http.status_code", String.valueOf(response.getStatus()));
            
            log.debug("服务器发送响应: status={}, traceId={}", 
                     response.getStatus(), span.getTraceId());
        }
    }
}

🔄 三、Trace 上下文传播原理

📡 上下文传播机制

Trace 上下文跨服务传播

/**
 * 追踪上下文管理器
 * 负责跨线程、跨服务传播追踪上下文
 */
@Component
@Slf4j
public class TraceContext {
    
    private final ThreadLocal<Span> currentSpan = new ThreadLocal<>();
    private final ThreadLocal<Map<String, String>> extraContext = new ThreadLocal<>();
    
    /**
     * 获取当前Span
     */
    public Span getCurrentSpan() {
        return currentSpan.get();
    }
    
    /**
     * 设置当前Span
     */
    public void setCurrentSpan(Span span) {
        currentSpan.set(span);
        if (span != null) {
            log.debug("设置当前Span: traceId={}, spanId={}", 
                     span.getTraceId(), span.getSpanId());
        }
    }
    
    /**
     * 清除上下文
     */
    public void clear() {
        currentSpan.remove();
        if (extraContext.get() != null) {
            extraContext.get().clear();
        }
        extraContext.remove();
    }
    
    /**
     * 注入追踪头到HTTP请求
     */
    public void inject(HttpHeaders headers) {
        Span span = getCurrentSpan();
        if (span != null && span.isSampled()) {
            headers.set("X-B3-TraceId", span.getTraceId());
            headers.set("X-B3-SpanId", span.getSpanId());
            headers.set("X-B3-ParentSpanId", span.getParentSpanId());
            headers.set("X-B3-Sampled", "1");
            
            // 注入自定义上下文
            injectCustomContext(headers);
        }
    }
    
    /**
     * 从HTTP请求头提取上下文
     */
    public Span extract(HttpHeaders headers) {
        String traceId = headers.getFirst("X-B3-TraceId");
        String spanId = headers.getFirst("X-B3-SpanId");
        String parentSpanId = headers.getFirst("X-B3-ParentSpanId");
        String sampled = headers.getFirst("X-B3-Sampled");
        
        if (traceId != null && spanId != null) {
            return Span.builder()
                .traceId(traceId)
                .spanId(spanId)
                .parentSpanId(parentSpanId)
                .sampled("1".equals(sampled))
                .build();
        }
        return null;
    }
}

/**
 * 跨线程上下文传播
 */
@Component
@Slf4j
public class TraceContextExecutor {
    
    private final TraceContext traceContext;
    
    /**
     * 包装Runnable以传播追踪上下文
     */
    public Runnable wrap(Runnable task) {
        Span currentSpan = traceContext.getCurrentSpan();
        Map<String, String> currentContext = copyExtraContext();
        
        return () -> {
            try {
                // 在新的线程中恢复上下文
                traceContext.setCurrentSpan(currentSpan);
                if (currentContext != null) {
                    traceContext.getExtraContext().putAll(currentContext);
                }
                
                task.run();
            } finally {
                traceContext.clear();
            }
        };
    }
    
    /**
     * 包装Callable以传播追踪上下文
     */
    public <T> Callable<T> wrap(Callable<T> task) {
        Span currentSpan = traceContext.getCurrentSpan();
        Map<String, String> currentContext = copyExtraContext();
        
        return () -> {
            try {
                // 在新的线程中恢复上下文
                traceContext.setCurrentSpan(currentSpan);
                if (currentContext != null) {
                    traceContext.getExtraContext().putAll(currentContext);
                }
                
                return task.call();
            } finally {
                traceContext.clear();
            }
        };
    }
    
    /**
     * 异步任务执行器
     */
    @Component
    public class TracingAsyncTaskExecutor implements AsyncTaskExecutor {
        
        private final AsyncTaskExecutor delegate;
        private final TraceContextExecutor contextExecutor;
        
        @Override
        public void execute(Runnable task, long startTimeout) {
            delegate.execute(contextExecutor.wrap(task), startTimeout);
        }
        
        @Override
        public Future<?> submit(Runnable task) {
            return delegate.submit(contextExecutor.wrap(task));
        }
        
        @Override
        public <T> Future<T> submit(Callable<T> task) {
            return delegate.submit(contextExecutor.wrap(task));
        }
    }
}

🔄 Feign 客户端集成

Feign 客户端追踪集成

/**
 * Feign 客户端追踪拦截器
 */
@Component
@Slf4j
public class TracingFeignInterceptor implements RequestInterceptor {
    
    private final Tracer tracer;
    private final TraceContext traceContext;
    
    @Override
    public void apply(RequestTemplate template) {
        Span currentSpan = traceContext.getCurrentSpan();
        if (currentSpan != null && currentSpan.isSampled()) {
            // 创建客户端Span
            Span clientSpan = tracer.nextSpan(currentSpan)
                .name("feign:" + template.method() + " " + template.url())
                .kind(Span.Kind.CLIENT)
                .timestamp(System.currentTimeMillis());
            
            // 记录客户端发送事件
            clientSpan.annotation(new Annotation(System.currentTimeMillis(), "cs"));
            
            // 注入追踪头
            template.header("X-B3-TraceId", clientSpan.getTraceId());
            template.header("X-B3-SpanId", clientSpan.getSpanId());
            template.header("X-B3-ParentSpanId", currentSpan.getSpanId());
            template.header("X-B3-Sampled", "1");
            
            // 添加业务标签
            clientSpan.tag("http.method", template.method());
            clientSpan.tag("http.url", template.url());
            clientSpan.tag("component", "feign");
            
            // 将客户端Span保存到请求属性中
            template.attribute("clientSpan", clientSpan);
            
            log.debug("Feign请求追踪: {} {}, traceId: {}", 
                     template.method(), template.url(), clientSpan.getTraceId());
        }
    }
    
    /**
     * Feign 响应处理
     */
    @Component
    public class TracingFeignLogger extends feign.Logger {
        
        @Override
        protected void log(String configKey, String format, Object... args) {
            if (log.isDebugEnabled()) {
                log.debug(format, args);
            }
        }
        
        @Override
        protected void logRequest(String configKey, Level logLevel, Request request) {
            // 记录请求日志
            if (log.isDebugEnabled()) {
                super.logRequest(configKey, logLevel, request);
            }
        }
        
        @Override
        protected Response logAndRebufferResponse(String configKey, Level logLevel, 
                                                Response response, long elapsedTime) throws IOException {
            // 记录响应并处理Span
            Request request = response.request();
            Span clientSpan = (Span) request.requestTemplate().attribute("clientSpan");
            
            if (clientSpan != null) {
                // 记录客户端接收事件
                clientSpan.annotation(new Annotation(System.currentTimeMillis(), "cr"));
                clientSpan.tag("http.status_code", String.valueOf(response.status()));
                clientSpan.setDuration(elapsedTime * 1000); // 转换为微秒
                
                // 关闭客户端Span
                tracer.close(clientSpan);
            }
            
            return super.logAndRebufferResponse(configKey, logLevel, response, elapsedTime);
        }
    }
}

📊 四、Zipkin 架构与数据流

🏗️ Zipkin 系统架构

Zipkin 数据流架构

应用程序
Span数据收集
Zipkin Collector
Storage 存储
Zipkin Query Service
Zipkin UI
消息队列 Kafka/RabbitMQ
Elasticsearch
MySQL
Cassandra

🔧 Zipkin 集成配置

Spring Cloud Sleuth Zipkin 配置

# application.yml Zipkin配置
spring:
  zipkin:
    # Zipkin服务器地址
    base-url: http://zipkin-server:9411
    # 启用Zipkin报告
    enabled: true
    # 服务名称
    service:
      name: user-service
    # 定位信息
    location:
      # 自动发现(通过服务发现)
      discovery:
        enabled: true
      # 或者直接指定
      host: localhost
      port: 9411
    # 发送配置
    sender:
      type: web # 支持web, kafka, rabbit
    # 压缩配置
    compression:
      enabled: true
    # 连接配置
    connect-timeout: 5000
    read-timeout: 10000

  sleuth:
    # 采样率配置
    sampler:
      probability: 1.0 # 1.0表示100%采样
    # HTTP请求追踪
    web:
      client:
        enabled: true
      # 跳过某些路径
      skip-pattern: /health,/info
    # 异步追踪
    async:
      enabled: true
    # 消息追踪
    messaging:
      enabled: true
    # 调度任务追踪
    schedule:
      enabled: true
    # Redis追踪
    redis:
      enabled: true
    # 数据库追踪
    jdbc:
      enabled: true

# Zipkin客户端高级配置
management:
  endpoints:
    web:
      exposure:
        include: health,info,metrics,zipkin
  endpoint:
    zipkin:
      enabled: true
  metrics:
    export:
      zipkin:
        enabled: true

# 日志配置(显示TraceId)
logging:
  pattern:
    level: "%5p [${spring.zipkin.service.name:},%X{traceId:-},%X{spanId:-}]"

📡 Zipkin 报告器实现

Zipkin Span 报告器

/**
 * Zipkin Span 报告器
 * 负责将Span数据发送到Zipkin服务器
 */
@Component
@Slf4j
public class ZipkinSpanReporter implements SpanReporter {
    
    private final ZipkinRestTemplateSender sender;
    private final ObjectMapper objectMapper;
    private final MeterRegistry meterRegistry;
    
    // 指标监控
    private final Counter spansSentCounter;
    private final Counter spansDroppedCounter;
    private final Timer sendTimer;
    
    public ZipkinSpanReporter(ZipkinRestTemplateSender sender, 
                             ObjectMapper objectMapper,
                             MeterRegistry meterRegistry) {
        this.sender = sender;
        this.objectMapper = objectMapper;
        this.meterRegistry = meterRegistry;
        
        // 初始化指标
        this.spansSentCounter = meterRegistry.counter("zipkin.spans.sent");
        this.spansDroppedCounter = meterRegistry.counter("zipkin.spans.dropped");
        this.sendTimer = meterRegistry.timer("zipkin.send.duration");
    }
    
    @Override
    public void report(Span span) {
        if (!span.isSampled()) {
            spansDroppedCounter.increment();
            return;
        }
        
        try {
            // 转换Span为Zipkin格式
            zipkin2.Span zipkinSpan = convertToZipkinSpan(span);
            
            // 发送Span到Zipkin
            sendTimer.record(() -> {
                try {
                    sender.sendSpans(Collections.singletonList(zipkinSpan));
                    spansSentCounter.increment();
                    
                    if (log.isDebugEnabled()) {
                        log.debug("Span发送成功: traceId={}, spanId={}", 
                                 span.getTraceId(), span.getSpanId());
                    }
                } catch (Exception e) {
                    log.error("Span发送失败", e);
                    spansDroppedCounter.increment();
                }
            });
            
        } catch (Exception e) {
            log.error("Span转换失败", e);
            spansDroppedCounter.increment();
        }
    }
    
    /**
     * 转换内部Span为Zipkin格式
     */
    private zipkin2.Span convertToZipkinSpan(Span span) {
        zipkin2.Span.Builder builder = zipkin2.Span.newBuilder()
            .traceId(span.getTraceId())
            .id(span.getSpanId())
            .name(span.getName())
            .timestamp(span.getTimestamp() * 1000) // 转换为微秒
            .duration(span.getDuration() * 1000);  // 转换为微秒
        
        // 设置父Span
        if (span.getParentSpanId() != null) {
            builder.parentId(span.getParentSpanId());
        }
        
        // 设置本地端点
        if (span.getLocalEndpoint() != null) {
            builder.localEndpoint(zipkin2.Endpoint.newBuilder()
                .serviceName(span.getLocalEndpoint().getServiceName())
                .ip(span.getLocalEndpoint().getIp())
                .port(span.getLocalEndpoint().getPort())
                .build());
        }
        
        // 设置远程端点
        if (span.getRemoteEndpoint() != null) {
            builder.remoteEndpoint(zipkin2.Endpoint.newBuilder()
                .serviceName(span.getRemoteEndpoint().getServiceName())
                .ip(span.getRemoteEndpoint().getIp())
                .port(span.getRemoteEndpoint().getPort())
                .build());
        }
        
        // 添加注解
        if (span.getAnnotations() != null) {
            for (Annotation annotation : span.getAnnotations()) {
                builder.addAnnotation(annotation.getTimestamp() * 1000, annotation.getValue());
            }
        }
        
        // 添加标签
        if (span.getTags() != null) {
            for (Map.Entry<String, String> tag : span.getTags().entrySet()) {
                builder.putTag(tag.getKey(), tag.getValue());
            }
        }
        
        // 设置Kind
        if (span.getKind() != null) {
            switch (span.getKind()) {
                case CLIENT:
                    builder.kind(zipkin2.Span.Kind.CLIENT);
                    break;
                case SERVER:
                    builder.kind(zipkin2.Span.Kind.SERVER);
                    break;
                case PRODUCER:
                    builder.kind(zipkin2.Span.Kind.PRODUCER);
                    break;
                case CONSUMER:
                    builder.kind(zipkin2.Span.Kind.CONSUMER);
                    break;
            }
        }
        
        // 设置共享标志
        if (span.isShared()) {
            builder.shared(true);
        }
        
        // 设置调试标志
        if (span.isDebug()) {
            builder.debug(true);
        }
        
        return builder.build();
    }
}

/**
 * Zipkin REST API 发送器
 */
@Component
@Slf4j
public class ZipkinRestTemplateSender {
    
    private final RestTemplate restTemplate;
    private final String zipkinBaseUrl;
    private final ObjectMapper objectMapper;
    
    public ZipkinRestTemplateSender(RestTemplate restTemplate,
                                  @Value("${spring.zipkin.base-url}") String zipkinBaseUrl,
                                  ObjectMapper objectMapper) {
        this.restTemplate = restTemplate;
        this.zipkinBaseUrl = zipkinBaseUrl;
        this.objectMapper = objectMapper;
    }
    
    /**
     * 批量发送Span到Zipkin
     */
    public void sendSpans(List<zipkin2.Span> spans) {
        if (spans.isEmpty()) {
            return;
        }
        
        try {
            // 序列化Span列表
            String jsonSpans = objectMapper.writeValueAsString(spans);
            
            // 构建请求
            HttpHeaders headers = new HttpHeaders();
            headers.setContentType(MediaType.APPLICATION_JSON);
            headers.set("Content-Encoding", "gzip");
            
            HttpEntity<byte[]> request = new HttpEntity<>(
                gzipCompress(jsonSpans), headers);
            
            // 发送到Zipkin
            ResponseEntity<String> response = restTemplate.postForEntity(
                zipkinBaseUrl + "/api/v2/spans", request, String.class);
            
            if (!response.getStatusCode().is2xxSuccessful()) {
                log.warn("Zipkin响应异常: {}", response.getStatusCode());
            }
            
        } catch (Exception e) {
            throw new RuntimeException("发送Span到Zipkin失败", e);
        }
    }
    
    /**
     * GZIP压缩
     */
    private byte[] gzipCompress(String data) throws IOException {
        ByteArrayOutputStream bos = new ByteArrayOutputStream(data.length());
        GZIPOutputStream gzip = new GZIPOutputStream(bos);
        gzip.write(data.getBytes(StandardCharsets.UTF_8));
        gzip.close();
        return bos.toByteArray();
    }
}

⚡ 五、消息中间件集成

📨 Kafka 消息追踪

Kafka 消息追踪集成

/**
 * Kafka 消息追踪支持
 * 自动注入和提取追踪上下文
 */
@Configuration
@Slf4j
public class KafkaTracingConfiguration {
    
    @Bean
    public TracingProducerFactory<String, String> tracingProducerFactory(
            ProducerFactory<String, String> producerFactory,
            Tracer tracer) {
        return new TracingProducerFactory<>(producerFactory, tracer);
    }
    
    @Bean
    public TracingConsumerFactory<String, String> tracingConsumerFactory(
            ConsumerFactory<String, String> consumerFactory,
            Tracer tracer) {
        return new TracingConsumerFactory<>(consumerFactory, tracer);
    }
}

/**
 * 追踪生产者工厂
 */
@Component
@Slf4j
public class TracingProducerFactory<K, V> implements ProducerFactory<K, V> {
    
    private final ProducerFactory<K, V> delegate;
    private final Tracer tracer;
    private static final String TRACE_HEADER = "traceContext";
    
    @Override
    public Producer<K, V> createProducer() {
        return new TracingProducer<>(delegate.createProducer(), tracer);
    }
    
    /**
     * 追踪生产者包装器
     */
    private static class TracingProducer<K, V> implements Producer<K, V> {
        
        private final Producer<K, V> delegate;
        private final Tracer tracer;
        
        public TracingProducer(Producer<K, V> delegate, Tracer tracer) {
            this.delegate = delegate;
            this.tracer = tracer;
        }
        
        @Override
        public Future<RecordMetadata> send(ProducerRecord<K, V> record) {
            // 注入追踪上下文到消息头
            ProducerRecord<K, V> tracedRecord = injectTraceContext(record);
            return delegate.send(tracedRecord);
        }
        
        @Override
        public Future<RecordMetadata> send(ProducerRecord<K, V> record, Callback callback) {
            ProducerRecord<K, V> tracedRecord = injectTraceContext(record);
            return delegate.send(tracedRecord, callback);
        }
        
        private ProducerRecord<K, V> injectTraceContext(ProducerRecord<K, V> record) {
            Span currentSpan = tracer.getTraceContext().getCurrentSpan();
            if (currentSpan != null && currentSpan.isSampled()) {
                // 创建生产者Span
                Span producerSpan = tracer.nextSpan(currentSpan)
                    .name("kafka:produce:" + record.topic())
                    .kind(Span.Kind.PRODUCER)
                    .timestamp(System.currentTimeMillis());
                
                // 记录生产者发送事件
                producerSpan.annotation(new Annotation(System.currentTimeMillis(), "ms"));
                producerSpan.tag("messaging.system", "kafka");
                producerSpan.tag("messaging.destination", record.topic());
                producerSpan.tag("messaging.destination_kind", "topic");
                
                // 注入到消息头
                Headers headers = record.headers();
                headers.add("X-B3-TraceId", producerSpan.getTraceId().getBytes());
                headers.add("X-B3-SpanId", producerSpan.getSpanId().getBytes());
                headers.add("X-B3-ParentSpanId", currentSpan.getSpanId().getBytes());
                headers.add("X-B3-Sampled", "1".getBytes());
                
                // 添加自定义追踪头
                headers.add("X-Trace-Service", "user-service".getBytes());
                
                log.debug("Kafka消息追踪 - 发送: topic={}, traceId={}", 
                         record.topic(), producerSpan.getTraceId());
                
                // 立即关闭生产者Span(异步发送)
                tracer.close(producerSpan);
            }
            
            return record;
        }
    }
}

🆚 六、SkyWalking vs Jaeger 对比

📊 功能对比分析

三大追踪系统对比

特性Spring Cloud Sleuth + ZipkinSkyWalkingJaeger
架构模式客户端-服务器探针-收集器客户端-收集器
数据存储ES, MySQL, CassandraES, H2, MySQL, TiDBCassandra, ES, Kafka
UI界面Zipkin UISkyWalking UIJaeger UI
语言支持Java为主,多语言支持多语言探针多语言客户端
性能开销中等低-中等
安装部署简单中等简单
生态系统Spring Cloud生态Apache项目CNCF项目
监控维度调用链、延迟拓扑图、指标、追踪分布式追踪

🔄 迁移到 SkyWalking

SkyWalking 配置示例

# agent.config
# 服务名称
agent.service_name=${SW_AGENT_NAME:user-service}

# 后端服务地址
collector.backend_service=${SW_AGENT_COLLECTOR:127.0.0.1:11800}

# 采样配置
agent.sample_n_per_3_secs=${SW_AGENT_SAMPLE:-1}

# 忽略后缀
agent.ignore_suffix=${SW_AGENT_IGNORE_SUFFIX:.jpg,.jpeg,.png,.gif,.css,.js}

# 跨进程传播配置
agent.cross_process_propagation_config=${SW_AGENT_CROSS_PROPAGATION:true}

💡 七、生产环境最佳实践

🔧 生产级配置

Sleuth + Zipkin 生产配置

spring:
  sleuth:
    # 采样配置
    sampler:
      probability: 0.1  # 生产环境建议10%采样率
    # 日志关联
    log:
      slf4j:
        whitelist-mdc-keys: traceId,spanId,parentSpanId
    # 异步配置
    async:
      enabled: true
      configurer:
        enabled: true
    # 调度任务配置
    schedule:
      enabled: true
      skip-pattern: healthCheckTask
    # 消息配置
    messaging:
      enabled: true
      rabbit:
        enabled: true
      kafka:
        enabled: true

  zipkin:
    # 生产环境Zipkin集群
    base-url: http://zipkin-cluster:9411
    # 发送配置
    sender:
      type: web
    # 压缩启用
    compression:
      enabled: true
    # 连接池配置
    rest-template:
      max-total-connections: 100
      max-per-route: 20
      connection-timeout: 5000
      read-timeout: 10000

# 日志模式配置
logging:
  pattern:
    level: "%5p [${spring.application.name:-},%X{traceId:-},%X{spanId:-}]"
  level:
    org.springframework.cloud.sleuth: INFO
    brave: WARN
    zipkin2: WARN

# 监控配置
management:
  endpoints:
    web:
      exposure:
        include: health,metrics,prometheus,sleuth
  endpoint:
    sleuth:
      enabled: true
  metrics:
    export:
      zipkin:
        enabled: true
        step: 1m

🚀 性能优化建议

高并发场景优化

/**
 * 高性能追踪配置
 * 针对高并发场景优化
 */
@Configuration
@Slf4j
public class HighPerformanceTracingConfig {
    
    /**
     * 异步Span报告器
     * 避免阻塞业务线程
     */
    @Bean
    @Primary
    public SpanReporter asyncSpanReporter(SpanReporter delegate) {
        return new AsyncSpanReporter(delegate);
    }
    
    /**
     * 批量Span报告器
     * 减少网络请求
     */
    @Bean
    public SpanReporter batchingSpanReporter(SpanReporter delegate) {
        return BatchingSpanReporter.wrap(delegate)
            .maxBatchSize(100)           // 最大批量大小
            .maxConcurrentBatches(5)     // 最大并发批次
            .batchInterval(Duration.ofSeconds(5))  // 批量间隔
            .build();
    }
    
    /**
     * 采样策略优化
     */
    @Bean
    public Sampler adaptiveSampler() {
        return new AdaptiveSampler()
            .baseProbability(0.01)      // 基础采样率1%
            .maxProbability(0.5)        // 最大采样率50%
            .windowSize(1000)           // 采样窗口
            .build();
    }
    
    /**
     * 自适应采样器
     */
    @Slf4j
    public static class AdaptiveSampler implements Sampler {
        
        private final double baseProbability;
        private final double maxProbability;
        private final int windowSize;
        private final AtomicInteger requestCount = new AtomicInteger(0);
        private final AtomicInteger sampleCount = new AtomicInteger(0);
        private volatile double currentProbability;
        
        public AdaptiveSampler(double baseProbability, double maxProbability, int windowSize) {
            this.baseProbability = baseProbability;
            this.maxProbability = maxProbability;
            this.windowSize = windowSize;
            this.currentProbability = baseProbability;
        }
        
        @Override
        public boolean isSampled() {
            int total = requestCount.incrementAndGet();
            
            // 滑动窗口
            if (total % windowSize == 0) {
                adjustSamplingRate();
            }
            
            // 概率采样
            return Math.random() < currentProbability;
        }
        
        private void adjustSamplingRate() {
            int sampled = sampleCount.get();
            double actualRate = (double) sampled / windowSize;
            
            // 动态调整采样率
            if (actualRate < baseProbability * 0.5) {
                currentProbability = Math.min(currentProbability * 1.2, maxProbability);
            } else if (actualRate > baseProbability * 1.5) {
                currentProbability = Math.max(currentProbability * 0.8, baseProbability);
            }
            
            // 重置计数器
            requestCount.set(0);
            sampleCount.set(0);
            
            log.info("调整采样率: {} -> {}", actualRate, currentProbability);
        }
    }
}

洞察:分布式追踪是微服务可观测性的核心支柱。合理的采样策略、高效的数据收集和智能的上下文传播,是构建生产级追踪系统的关键。理解数据流和性能影响,才能在业务需求和系统开销之间找到最佳平衡。


如果觉得本文对你有帮助,请点击 👍 点赞 + ⭐ 收藏 + 💬 留言支持!

讨论话题

  1. 你在生产环境中使用哪种分布式追踪方案?有什么经验教训?
  2. 如何设计合理的采样策略来平衡性能和可观测性?
  3. 在跨语言微服务架构中如何实现完整的链路追踪?

相关资源推荐

  • 📚 https://spring.io/projects/spring-cloud-sleuth
  • 🔧 https://zipkin.io/
  • 💻 https://github.com/example/sleuth-zipkin-demo
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

湮酒

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值