Redis: Thread limit exceeded replacing blocked worker

Redis: Thread limit exceeded replacing blocked worker

最近系统在运行过程中出现了这样的一个错误,外层是:RedisSystemException: Unknown redis exception,内层是:RejectedExecutionException: Thread limit exceeded replacing blocked worker。系统的情况是这样的,使用 .parallel() 在并行流中执行任务,任务需要根据 redis 中是否存在某个 key 进行查数据库还是直接组织数据。

java 复制代码
org.springframework.data.redis.RedisSystemException: Unknown redis exception
	at org.springframework.data.redis.FallbackExceptionTranslationStrategy.getFallback(FallbackExceptionTranslationStrategy.java:49) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.FallbackExceptionTranslationStrategy.translate(FallbackExceptionTranslationStrategy.java:39) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.connection.lettuce.LettuceConnection.convertLettuceAccessException(LettuceConnection.java:248) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.connection.lettuce.LettuceConnection.await(LettuceConnection.java:961) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.connection.lettuce.LettuceConnection.lambda$doInvoke$4(LettuceConnection.java:818) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.connection.lettuce.LettuceInvoker$Synchronizer.invoke(LettuceInvoker.java:665) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.connection.lettuce.LettuceInvoker.just(LettuceInvoker.java:94) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.connection.lettuce.LettuceStringCommands.get(LettuceStringCommands.java:52) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.connection.DefaultedRedisConnection.get(DefaultedRedisConnection.java:284) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.cache.DefaultRedisCacheWriter.lambda$get$1(DefaultRedisCacheWriter.java:122) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.cache.DefaultRedisCacheWriter.execute(DefaultRedisCacheWriter.java:276) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.cache.DefaultRedisCacheWriter.get(DefaultRedisCacheWriter.java:122) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.data.redis.cache.RedisCache.lookup(RedisCache.java:86) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	at org.springframework.cache.support.AbstractValueAdaptingCache.get(AbstractValueAdaptingCache.java:58) ~[spring-context-6.0.9.jar!/:6.0.9]
	at org.springframework.cache.interceptor.AbstractCacheInvoker.doGet(AbstractCacheInvoker.java:73) ~[spring-context-6.0.9.jar!/:6.0.9]
	at org.springframework.cache.interceptor.CacheAspectSupport.findInCaches(CacheAspectSupport.java:571) ~[spring-context-6.0.9.jar!/:6.0.9]
	at org.springframework.cache.interceptor.CacheAspectSupport.findCachedItem(CacheAspectSupport.java:536) ~[spring-context-6.0.9.jar!/:6.0.9]
	at org.springframework.cache.interceptor.CacheAspectSupport.execute(CacheAspectSupport.java:402) ~[spring-context-6.0.9.jar!/:6.0.9]
	at org.springframework.cache.interceptor.CacheAspectSupport.execute(CacheAspectSupport.java:345) ~[spring-context-6.0.9.jar!/:6.0.9]
	at org.springframework.cache.interceptor.CacheInterceptor.invoke(CacheInterceptor.java:64) ~[spring-context-6.0.9.jar!/:6.0.9]
	at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:184) ~[spring-aop-6.0.9.jar!/:6.0.9]
	at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750) ~[spring-aop-6.0.9.jar!/:6.0.9]
	at org.springframework.aop.interceptor.ExposeInvocationInterceptor.invoke(ExposeInvocationInterceptor.java:97) ~[spring-aop-6.0.9.jar!/:6.0.9]
	at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:184) ~[spring-aop-6.0.9.jar!/:6.0.9]
	at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750) ~[spring-aop-6.0.9.jar!/:6.0.9]
	at org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:702) ~[spring-aop-6.0.9.jar!/:6.0.9]
	at //=====================业务相关====================================
	at java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197) ~[na:na]
	at java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1625) ~[na:na]
	at java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509) ~[na:na]
	at java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499) ~[na:na]
	at java.base/java.util.stream.ReduceOps$ReduceTask.doLeaf(ReduceOps.java:960) ~[na:na]
	at java.base/java.util.stream.ReduceOps$ReduceTask.doLeaf(ReduceOps.java:934) ~[na:na]
	at java.base/java.util.stream.AbstractTask.compute(AbstractTask.java:327) ~[na:na]
	at java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:754) ~[na:na]
	at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:373) ~[na:na]
	at java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1182) ~[na:na]
	at java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1655) ~[na:na]
	at java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1622) ~[na:na]
	at java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:165) ~[na:na]
Caused by: java.util.concurrent.RejectedExecutionException: Thread limit exceeded replacing blocked worker
	at java.base/java.util.concurrent.ForkJoinPool.tryCompensate(ForkJoinPool.java:1819) ~[na:na]
	at java.base/java.util.concurrent.ForkJoinPool.compensatedBlock(ForkJoinPool.java:3446) ~[na:na]
	at java.base/java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3432) ~[na:na]
	at java.base/java.util.concurrent.CompletableFuture.timedGet(CompletableFuture.java:1939) ~[na:na]
	at java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:2095) ~[na:na]
	at io.lettuce.core.protocol.AsyncCommand.await(AsyncCommand.java:83) ~[lettuce-core-6.2.4.RELEASE.jar!/:6.2.4.RELEASE]
	at io.lettuce.core.internal.Futures.awaitOrCancel(Futures.java:244) ~[lettuce-core-6.2.4.RELEASE.jar!/:6.2.4.RELEASE]
	at io.lettuce.core.LettuceFutures.awaitOrCancel(LettuceFutures.java:74) ~[lettuce-core-6.2.4.RELEASE.jar!/:6.2.4.RELEASE]
	at org.springframework.data.redis.connection.lettuce.LettuceConnection.await(LettuceConnection.java:959) ~[spring-data-redis-3.0.6.jar!/:3.0.6]
	... 38 common frames omitted

我先说是怎么解决的:Java 9 开始,可以通过系统属性配置启动服务时指定参数 -Djava.util.concurrent.ForkJoinPool.common.maximumSpares=1024 来避免这个问题,参数值具体修改为多少与业务有关系,这个参数的作用随后解释

ForkJoinPool是基于工作窃取(Work-Stealing)算法实现的线程池,每个线程都有自己的工作队列,用于存储待执行的任务。当一个线程执行完自己的任务之后,会从其他线程的工作队列中窃取任务执行,以此来实现任务的动态均衡和线程的利用率最大化。

一般我们都会说 ForkJoinPool 适合做 CPU 密集型的工作,而不是做 IO 密集型的工作,为什么呢?看下 JDK 17 中的源码

java 复制代码
    private int tryCompensate(long c, boolean canSaturate) {
        Predicate<? super ForkJoinPool> sat;
        long b = bounds;                               // unpack fields
        int pc = parallelism;
        int minActive = (short)(b & SMASK),
            maxTotal  = (short)(b >>> SWIDTH) + pc,
            active    = (short)(c >>> RC_SHIFT),
            total     = (short)(c >>> TC_SHIFT),
            sp        = (int)c & ~INACTIVE;
        if (sp != 0 && active <= pc) {                 // activate idle worker
            WorkQueue[] qs; WorkQueue v; int i;
            if (ctl == c && (qs = queues) != null &&
                qs.length > (i = sp & SMASK) && (v = qs[i]) != null) {
                long nc = (v.stackPred & SP_MASK) | (UC_MASK & c);
                if (compareAndSetCtl(c, nc)) {
                    v.phase = sp;
                    LockSupport.unpark(v.owner);
                    return UNCOMPENSATE;
                }
            }
            return -1;                                  // retry
        }
        else if (active > minActive && total >= pc) {   // reduce active workers
            long nc = ((RC_MASK & (c - RC_UNIT)) | (~RC_MASK & c));
            return compareAndSetCtl(c, nc) ? UNCOMPENSATE : -1;
        }
        else if (total < maxTotal && total < MAX_CAP) { // expand pool
            long nc = ((c + TC_UNIT) & TC_MASK) | (c & ~TC_MASK);
            return (!compareAndSetCtl(c, nc) ? -1 :
                    !createWorker() ? 0 : UNCOMPENSATE);
        }
        else if (!compareAndSetCtl(c, c))               // validate
            return -1;
        else if (canSaturate || ((sat = saturate) != null && sat.test(this)))
            return 0;
        else
            throw new RejectedExecutionException(
                "Thread limit exceeded replacing blocked worker");
    }

注意代码段第 30 行 createWorker() 即创建新线程,37 行丢出我们看到的异常 Thread limit exceeded replacing blocked worker,也就是丢出这个异常前没有满足创建新线程的条件,理所当然我们会思考这个线程数最大是多少,线程最大值为:pallstream 并行度 + maximumSpares(默认256),对于 16 核 cpu 来讲也就是 271(256+16-1)。

这其实涉及到 ForkJoinPool 的工作机制,当一个比较复杂的判断满足时(补偿阻塞线程时),会创建新的线程,当线程数达到最大值就可能抛出这个异常,我的使用情况就是在 ForkJoinPool 中(大概几十万任务)并发调用 redis 导致线程阻塞,因此修改了 maximumSpares ,那么我是怎么确定是这个问题的呢,我在项目中部署了 arthas,使用 dashboard 和 thread 指令查看了系统的运行情况(如下图所示),可以看出我的线程池中线程数已经数百了,并且依然会处于 TIMED_WAITING 状态~

最开始看到 Unknown redis exception 以为是 redis 的连接池配置有问题,还修改了 redis 的 maxclients 和 spring.redis.timeout 参数,不过也确实是起了点作用的,并发访问 redis 时给 spring.redis.timeout 配置一个比较长的值也有必要。

其实还是自己平时经验太少了,看到 RejectedExecutionException 就应该想到这是 java 线程池的一种拒绝策略~

相关推荐
Lenyiin2 小时前
Linux 基础IO
java·linux·服务器
松☆2 小时前
Dart 核心语法精讲:从空安全到流程控制(3)
android·java·开发语言
编码者卢布2 小时前
【App Service】Java应用上传文件功能部署在App Service Windows上报错 413 Payload Too Large
java·开发语言·windows
q行3 小时前
Spring概述(含单例设计模式和工厂设计模式)
java·spring
惊讶的猫3 小时前
Redis双写一致性
数据库·redis·缓存
好好研究4 小时前
SpringBoot扩展SpringMVC
java·spring boot·spring·servlet·filter·listener
毕设源码-郭学长4 小时前
【开题答辩全过程】以 高校项目团队管理网站为例,包含答辩的问题和答案
java
玄〤4 小时前
Java 大数据量输入输出优化方案详解:从 Scanner 到手写快读(含漫画解析)
java·开发语言·笔记·算法