堆外内存OutOfDirectMemoryError处理

 ERROR reactor.core.scheduler.Schedulers                            [] - Scheduler worker in group Flink Task Threads failed with an uncaught exception
io.netty.util.internal.OutOfDirectMemoryError: failed to allocate 16777216 byte(s) of direct memory (used: 671088640, max: 673605229)
	at io.netty.util.internal.PlatformDependent.incrementMemoryCounter(PlatformDependent.java:802) ~[netty-common-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.util.internal.PlatformDependent.allocateDirectNoCleaner(PlatformDependent.java:731) ~[netty-common-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena$DirectArena.allocateDirect(PoolArena.java:648) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena$DirectArena.newChunk(PoolArena.java:623) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.allocateNormal(PoolArena.java:202) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.tcacheAllocateSmall(PoolArena.java:172) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.allocate(PoolArena.java:134) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PoolArena.reallocate(PoolArena.java:286) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.PooledByteBuf.capacity(PooledByteBuf.java:122) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.AbstractByteBuf.ensureWritable0(AbstractByteBuf.java:305) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.AbstractByteBuf.ensureWritable(AbstractByteBuf.java:280) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.AbstractByteBuf.writeBytes(AbstractByteBuf.java:1073) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at io.netty.buffer.ByteBufOutputStream.write(ByteBufOutputStream.java:67) ~[netty-buffer-4.1.74.Final.jar:4.1.74.Final]
	at org.nustaq.serialization.util.FSTOutputStream.copyTo(FSTOutputStream.java:122) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.util.FSTOutputStream.flush(FSTOutputStream.java:146) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.coders.FSTStreamEncoder.flush(FSTStreamEncoder.java:530) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.FSTObjectOutput.flush(FSTObjectOutput.java:156) ~[fst-2.57.jar:?]
	at org.nustaq.serialization.FSTObjectOutput.close(FSTObjectOutput.java:165) ~[fst-2.57.jar:?]
	at com.agioe.eventbus.codec.FSTMessageCodec.serialize(FSTMessageCodec.java:44) ~[blob_p-d047fb355ccbb70b608533b459bbefe0707f2b7c-5942ea43d238571bf61e191dfdc74ac9:?]
	at io.scalecube.transport.netty.TransportImpl.encodeMessage(TransportImpl.java:216) ~[scalecube-transport-netty-2.6.12.jar:?]
	at reactor.core.publisher.FluxMapFuseable$MapFuseableSubscriber.onNext(FluxMapFuseable.java:113) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Operators$ScalarSubscription.request(Operators.java:2398) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.FluxMapFuseable$MapFuseableSubscriber.request(FluxMapFuseable.java:169) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoFlatMap$FlatMapMain.onSubscribe(MonoFlatMap.java:110) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.FluxMapFuseable$MapFuseableSubscriber.onSubscribe(FluxMapFuseable.java:96) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoJust.subscribe(MonoJust.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:64) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:64) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoFlatMap$FlatMapMain.onNext(MonoFlatMap.java:157) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Operators$MonoSubscriber.complete(Operators.java:1816) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoCacheTime.subscribeOrReturn(MonoCacheTime.java:151) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:57) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:64) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.MonoDefer.subscribe(MonoDefer.java:52) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4400) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribeWith(Mono.java:4515) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4371) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4307) ~[reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.publisher.Mono.subscribe(Mono.java:4279) ~[reactor-core-3.4.17.jar:3.4.17]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.lambda$spreadGossipsTo$7(GossipProtocolImpl.java:296) ~[scalecube-cluster-2.6.12.jar:?]
	at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183) ~[?:1.8.0_345]
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) ~[?:1.8.0_345]
	at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384) ~[?:1.8.0_345]
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482) ~[?:1.8.0_345]
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) ~[?:1.8.0_345]
	at java.util.stream.ForEachOps$ForEachOp.evaluateSequential(ForEachOps.java:150) ~[?:1.8.0_345]
	at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(ForEachOps.java:173) ~[?:1.8.0_345]
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[?:1.8.0_345]
	at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:485) ~[?:1.8.0_345]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.spreadGossipsTo(GossipProtocolImpl.java:292) ~[scalecube-cluster-2.6.12.jar:?]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.lambda$doSpreadGossip$4(GossipProtocolImpl.java:156) ~[scalecube-cluster-2.6.12.jar:?]
	at java.lang.Iterable.forEach(Iterable.java:75) ~[?:1.8.0_345]
	at io.scalecube.cluster.gossip.GossipProtocolImpl.doSpreadGossip(GossipProtocolImpl.java:156) ~[scalecube-cluster-2.6.12.jar:?]
	at reactor.core.scheduler.PeriodicSchedulerTask.call(PeriodicSchedulerTask.java:49) [reactor-core-3.4.17.jar:3.4.17]
	at reactor.core.scheduler.PeriodicSchedulerTask.run(PeriodicSchedulerTask.java:63) [reactor-core-3.4.17.jar:3.4.17]
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [?:1.8.0_345]
	at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) [?:1.8.0_345]
	at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) [?:1.8.0_345]
	at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) [?:1.8.0_345]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_345]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_345]
	at java.lang.Thread.run(Thread.java:750) [?:1.8.0_345]

出现堆外内存溢出错误,阅读异常内容得知是分配内存时,内存不足导致。于是定位到分配内存的代码部分。

PlatformDependent.incrementMemoryCounter方法

可以看出分配时要通过DIRECT_MEMORY_COUNTER计数,从而判定是否够分配,那么可通过反射监听DIRECT_MEMORY_COUNTER在什么时候会增长变化。

@Slf4j
public class DirectMemoryProcess extends ProcessFunction<String, String> {

    private static final int _1k = 1024;
    private static final String BUSINESS_KEY = "netty_direct_memory";

    private AtomicLong directMemory;
    private boolean flag = true;
    Field field;
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
          field = ReflectionUtils.findField(PlatformDependent.class, "DIRECT_MEMORY_COUNTER");
        field.setAccessible(true);

    }


    private void doReport() {
        try {

            directMemory = (AtomicLong) field.get(PlatformDependent.class);

            int memoryInKb = (int) (directMemory.get() / _1k);
            log.info("{}:{}k", BUSINESS_KEY, memoryInKb);
        } catch (Exception e) {
        }
    }

    @Override
    public void processElement(String s, Context context, Collector<String> collector) throws Exception {

        if (flag) {
            ScheduledExecutorService threadPool = Executors.newScheduledThreadPool(1);
            Runnable r = () -> doReport();
            threadPool.scheduleAtFixedRate(r, 0, 1, TimeUnit.SECONDS);
            flag = false;
        }
    }
}

flink版和spring版

监听堆外内存增长的规律,可定位内存增长的原因。

而flink中的堆外内存溢出,主要是因为taskmanager的堆外内存分配空间不足,修改配置文件taskmanager.memory.framework.off-heap.size: 1g

可解决。

反射获取某个变量的变化值来定位问题的思路是可以借鉴的。

参考地址:Netty堆外内存泄露排查盛宴 - 美团技术团队

相关推荐
武子康24 分钟前
大数据-258 离线数仓 - Griffin架构 配置安装 Livy 架构设计 解压配置 Hadoop Hive
java·大数据·数据仓库·hive·hadoop·架构
豪宇刘1 小时前
MyBatis的面试题以及详细解答二
java·servlet·tomcat
秋恬意1 小时前
Mybatis能执行一对一、一对多的关联查询吗?都有哪些实现方式,以及它们之间的区别
java·数据库·mybatis
FF在路上2 小时前
Knife4j调试实体类传参扁平化模式修改:default-flat-param-object: true
java·开发语言
真的很上进2 小时前
如何借助 Babel+TS+ESLint 构建现代 JS 工程环境?
java·前端·javascript·css·react.js·vue·html
众拾达人3 小时前
Android自动化测试实战 Java篇 主流工具 框架 脚本
android·java·开发语言
皓木.3 小时前
Mybatis-Plus
java·开发语言
不良人天码星3 小时前
lombok插件不生效
java·开发语言·intellij-idea
守护者1703 小时前
JAVA学习-练习试用Java实现“使用Arrays.toString方法将数组转换为字符串并打印出来”
java·学习
源码哥_博纳软云3 小时前
JAVA同城服务场馆门店预约系统支持H5小程序APP源码
java·开发语言·微信小程序·小程序·微信公众平台