堆外内存OutOfDirectMemoryError处理

 ERROR reactor.core.scheduler.Schedulers                            [] - Scheduler worker in group Flink Task Threads failed with an uncaught exception
io.netty.util.internal.OutOfDirectMemoryError: failed to allocate 16777216 byte(s) of direct memory (used: 671088640, max: 673605229)
	at io.netty.util.internal.PlatformDependent.incrementMemoryCounter(PlatformDependent.java:802) ~[netty-common-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.util.internal.PlatformDependent.allocateDirectNoCleaner(PlatformDependent.java:731) ~[netty-common-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena$DirectArena.allocateDirect(PoolArena.java:648) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena$DirectArena.newChunk(PoolArena.java:623) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.allocateNormal(PoolArena.java:202) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.tcacheAllocateSmall(PoolArena.java:172) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.allocate(PoolArena.java:134) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.reallocate(PoolArena.java:286) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PooledByteBuf.capacity(PooledByteBuf.java:122) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.AbstractByteBuf.ensureWritable0(AbstractByteBuf.java:305) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.AbstractByteBuf.ensureWritable(AbstractByteBuf.java:280) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.AbstractByteBuf.writeBytes(AbstractByteBuf.java:1073) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.ByteBufOutputStream.write(ByteBufOutputStream.java:67) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at org.nustaq.serialization.util.FSTOutputStream.copyTo(FSTOutputStream.java:122) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.util.FSTOutputStream.flush(FSTOutputStream.java:146) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.coders.FSTStreamEncoder.flush(FSTStreamEncoder.java:530) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.FSTObjectOutput.flush(FSTObjectOutput.java:156) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.FSTObjectOutput.close(FSTObjectOutput.java:165) ~[fst-2.57.jar:?]
	at com.agioe.eventbus.codec.FSTMessageCodec.serialize(FSTMessageCodec.java:44) ~[blob_p-d047fb355ccbb70b608533b459bbefe0707f2b7c-5942ea43d238571bf61e191dfdc74ac9:?]
	at io.scalecube.transport.netty.TransportImpl.encodeMessage(TransportImpl.java:216) ~[scalecube-transport-netty-2.6.12.jar:?]
	at reactor.core.publisher.FluxMapFuseable$MapFuseableSubscriber.onNext(FluxMapFuseable.java:113) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Operators$ScalarSubscription.request(Operators.java:2398) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.FluxMapFuseable$MapFuseableSubscriber.request(FluxMapFuseable.java:169) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoFlatMap$FlatMapMain.onSubscribe(MonoFlatMap.java:110) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.FluxMapFuseable$MapFuseableSubscriber.onSubscribe(FluxMapFuseable.java:96) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoJust.subscribe(MonoJust.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:64) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:64) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoFlatMap$FlatMapMain.onNext(MonoFlatMap.java:157) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Operators$MonoSubscriber.complete(Operators.java:1816) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoCacheTime.subscribeOrReturn(MonoCacheTime.java:151) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:57) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:64) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDefer.subscribe(MonoDefer.java:52) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4400) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribeWith(Mono.java:4515) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4371) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4307) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4279) ~[reactor-core-3.4.17.jar:3.4.17]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.lambda$spreadGossipsTo$7(GossipProtocolImpl.java:296) ~[scalecube-cluster-2.6.12.jar:?]
	at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183) ~[?:1.8.0_345]
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) ~[?:1.8.0_345]
	at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384) ~[?:1.8.0_345]
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482) ~[?:1.8.0_345]
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) ~[?:1.8.0_345]
	at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150) ~[?:1.8.0_345]
	at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173) ~[?:1.8.0_345]
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[?:1.8.0_345]
	at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485) ~[?:1.8.0_345]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.spreadGossipsTo(GossipProtocolImpl.java:292) ~[scalecube-cluster-2.6.12.jar:?]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.lambda$doSpreadGossip$4(GossipProtocolImpl.java:156) ~[scalecube-cluster-2.6.12.jar:?]
	at java.lang.Iterable.forEach(Iterable.java:75) ~[?:1.8.0_345]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.doSpreadGossip(GossipProtocolImpl.java:156) ~[scalecube-cluster-2.6.12.jar:?]
	at reactor.core.scheduler.PeriodicSchedulerTask.call(PeriodicSchedulerTask.java:49) [reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.scheduler.PeriodicSchedulerTask.run(PeriodicSchedulerTask.java:63) [reactor-core-3.4.17.jar:3.4.17]
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_345]
	at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) [?:1.8.0_345]
	at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) [?:1.8.0_345]
	at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) [?:1.8.0_345]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_345]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_345]
	at java.lang.Thread.run(Thread.java:750) [?:1.8.0_345]

出现堆外内存溢出错误,阅读异常内容得知是分配内存时,内存不足导致。于是定位到分配内存的代码部分。

PlatformDependent.incrementMemoryCounter方法

可以看出分配时要通过DIRECT_MEMORY_COUNTER计数,从而判定是否够分配,那么可通过反射监听DIRECT_MEMORY_COUNTER在什么时候会增长变化。

@Slf4j
public class DirectMemoryProcess extends ProcessFunction<String, String> {

    private static final int _1k = 1024;
    private static final String BUSINESS_KEY = "netty_direct_memory";

    private AtomicLong directMemory;
    private boolean flag = true;
    Field field;
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
          field = ReflectionUtils.findField(PlatformDependent.class, "DIRECT_MEMORY_COUNTER");
        field.setAccessible(true);

    }


    private void doReport() {
        try {

            directMemory = (AtomicLong) field.get(PlatformDependent.class);

            int memoryInKb = (int) (directMemory.get() / _1k);
            log.info("{}:{}k", BUSINESS_KEY, memoryInKb);
        } catch (Exception e) {
        }
    }

    @Override
    public void processElement(String s, Context context, Collector<String> collector) throws Exception {

        if (flag) {
            ScheduledExecutorService threadPool = Executors.newScheduledThreadPool(1);
            Runnable r = () -> doReport();
            threadPool.scheduleAtFixedRate(r, 0, 1, TimeUnit.SECONDS);
            flag = false;
        }
    }
}

flink版和spring版

监听堆外内存增长的规律,可定位内存增长的原因。

而flink中的堆外内存溢出,主要是因为taskmanager的堆外内存分配空间不足,修改配置文件taskmanager.memory.framework.off-heap.size: 1g

可解决。

反射获取某个变量的变化值来定位问题的思路是可以借鉴的。

参考地址:Netty堆外内存泄露排查盛宴 - 美团技术团队

相关推荐
吾日三省吾码2 小时前
JVM 性能调优
java
弗拉唐3 小时前
springBoot,mp,ssm整合案例
java·spring boot·mybatis
oi773 小时前
使用itextpdf进行pdf模版填充中文文本时部分字不显示问题
java·服务器
少说多做3434 小时前
Android 不同情况下使用 runOnUiThread
android·java
知兀4 小时前
Java的方法、基本和引用数据类型
java·笔记·黑马程序员
蓝黑20204 小时前
IntelliJ IDEA常用快捷键
java·ide·intellij-idea
Ysjt | 深4 小时前
C++多线程编程入门教程(优质版)
java·开发语言·jvm·c++
shuangrenlong4 小时前
slice介绍slice查看器
java·ubuntu
牧竹子4 小时前
对原jar包解压后修改原class文件后重新打包为jar
java·jar