堆外内存OutOfDirectMemoryError处理

 ERROR reactor.core.scheduler.Schedulers                            [] - Scheduler worker in group Flink Task Threads failed with an uncaught exception
io.netty.util.internal.OutOfDirectMemoryError: failed to allocate 16777216 byte(s) of direct memory (used: 671088640, max: 673605229)
	at io.netty.util.internal.PlatformDependent.incrementMemoryCounter(PlatformDependent.java:802) ~[netty-common-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.util.internal.PlatformDependent.allocateDirectNoCleaner(PlatformDependent.java:731) ~[netty-common-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena$DirectArena.allocateDirect(PoolArena.java:648) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena$DirectArena.newChunk(PoolArena.java:623) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.allocateNormal(PoolArena.java:202) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.tcacheAllocateSmall(PoolArena.java:172) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.allocate(PoolArena.java:134) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.reallocate(PoolArena.java:286) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PooledByteBuf.capacity(PooledByteBuf.java:122) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.AbstractByteBuf.ensureWritable0(AbstractByteBuf.java:305) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.AbstractByteBuf.ensureWritable(AbstractByteBuf.java:280) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.AbstractByteBuf.writeBytes(AbstractByteBuf.java:1073) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.ByteBufOutputStream.write(ByteBufOutputStream.java:67) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at org.nustaq.serialization.util.FSTOutputStream.copyTo(FSTOutputStream.java:122) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.util.FSTOutputStream.flush(FSTOutputStream.java:146) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.coders.FSTStreamEncoder.flush(FSTStreamEncoder.java:530) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.FSTObjectOutput.flush(FSTObjectOutput.java:156) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.FSTObjectOutput.close(FSTObjectOutput.java:165) ~[fst-2.57.jar:?]
	at com.agioe.eventbus.codec.FSTMessageCodec.serialize(FSTMessageCodec.java:44) ~[blob_p-d047fb355ccbb70b608533b459bbefe0707f2b7c-5942ea43d238571bf61e191dfdc74ac9:?]
	at io.scalecube.transport.netty.TransportImpl.encodeMessage(TransportImpl.java:216) ~[scalecube-transport-netty-2.6.12.jar:?]
	at reactor.core.publisher.FluxMapFuseable$MapFuseableSubscriber.onNext(FluxMapFuseable.java:113) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Operators$ScalarSubscription.request(Operators.java:2398) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.FluxMapFuseable$MapFuseableSubscriber.request(FluxMapFuseable.java:169) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoFlatMap$FlatMapMain.onSubscribe(MonoFlatMap.java:110) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.FluxMapFuseable$MapFuseableSubscriber.onSubscribe(FluxMapFuseable.java:96) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoJust.subscribe(MonoJust.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:64) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:64) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoFlatMap$FlatMapMain.onNext(MonoFlatMap.java:157) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Operators$MonoSubscriber.complete(Operators.java:1816) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoCacheTime.subscribeOrReturn(MonoCacheTime.java:151) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:57) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:64) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDefer.subscribe(MonoDefer.java:52) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4400) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribeWith(Mono.java:4515) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4371) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4307) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4279) ~[reactor-core-3.4.17.jar:3.4.17]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.lambda$spreadGossipsTo$7(GossipProtocolImpl.java:296) ~[scalecube-cluster-2.6.12.jar:?]
	at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183) ~[?:1.8.0_345]
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) ~[?:1.8.0_345]
	at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384) ~[?:1.8.0_345]
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482) ~[?:1.8.0_345]
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) ~[?:1.8.0_345]
	at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150) ~[?:1.8.0_345]
	at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173) ~[?:1.8.0_345]
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[?:1.8.0_345]
	at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485) ~[?:1.8.0_345]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.spreadGossipsTo(GossipProtocolImpl.java:292) ~[scalecube-cluster-2.6.12.jar:?]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.lambda$doSpreadGossip$4(GossipProtocolImpl.java:156) ~[scalecube-cluster-2.6.12.jar:?]
	at java.lang.Iterable.forEach(Iterable.java:75) ~[?:1.8.0_345]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.doSpreadGossip(GossipProtocolImpl.java:156) ~[scalecube-cluster-2.6.12.jar:?]
	at reactor.core.scheduler.PeriodicSchedulerTask.call(PeriodicSchedulerTask.java:49) [reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.scheduler.PeriodicSchedulerTask.run(PeriodicSchedulerTask.java:63) [reactor-core-3.4.17.jar:3.4.17]
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_345]
	at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) [?:1.8.0_345]
	at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) [?:1.8.0_345]
	at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) [?:1.8.0_345]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_345]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_345]
	at java.lang.Thread.run(Thread.java:750) [?:1.8.0_345]

出现堆外内存溢出错误,阅读异常内容得知是分配内存时,内存不足导致。于是定位到分配内存的代码部分。

PlatformDependent.incrementMemoryCounter方法

可以看出分配时要通过DIRECT_MEMORY_COUNTER计数,从而判定是否够分配,那么可通过反射监听DIRECT_MEMORY_COUNTER在什么时候会增长变化。

@Slf4j
public class DirectMemoryProcess extends ProcessFunction<String, String> {

    private static final int _1k = 1024;
    private static final String BUSINESS_KEY = "netty_direct_memory";

    private AtomicLong directMemory;
    private boolean flag = true;
    Field field;
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
          field = ReflectionUtils.findField(PlatformDependent.class, "DIRECT_MEMORY_COUNTER");
        field.setAccessible(true);

    }


    private void doReport() {
        try {

            directMemory = (AtomicLong) field.get(PlatformDependent.class);

            int memoryInKb = (int) (directMemory.get() / _1k);
            log.info("{}:{}k", BUSINESS_KEY, memoryInKb);
        } catch (Exception e) {
        }
    }

    @Override
    public void processElement(String s, Context context, Collector<String> collector) throws Exception {

        if (flag) {
            ScheduledExecutorService threadPool = Executors.newScheduledThreadPool(1);
            Runnable r = () -> doReport();
            threadPool.scheduleAtFixedRate(r, 0, 1, TimeUnit.SECONDS);
            flag = false;
        }
    }
}

flink版和spring版

监听堆外内存增长的规律,可定位内存增长的原因。

而flink中的堆外内存溢出,主要是因为taskmanager的堆外内存分配空间不足,修改配置文件taskmanager.memory.framework.off-heap.size: 1g

可解决。

反射获取某个变量的变化值来定位问题的思路是可以借鉴的。

参考地址:Netty堆外内存泄露排查盛宴 - 美团技术团队

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值