问题排查
在应用中使用druid连接池管理数据库连接,但是某一天线上定时任务不跑了,到公司后,立刻排查定时任务日志发现所有线程都在运行中,新的线程也启动不了导致后续任务都执行不了,为了分析线程都在干嘛,我们执行jstack命令将jvm中所有线程都输出到文本中。
通过分析文本发现线程都在等待数据库连接
"ExecutorAPI-jobExecute-DayBatchTask2187" #155719 prio=5 os_prio=0 tid=0x00007f89b4005800 nid=0x122bc waiting on condition [0x00007f83df0ee000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000002a10a7210> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at com.alibaba.druid.pool.DruidDataSource.takeLast(DruidDataSource.java:2218)
at com.alibaba.druid.pool.DruidDataSource.getConnectionInternal(DruidDataSource.java:1690)
at com.alibaba.druid.pool.DruidDataSource.getConnectionDirect(DruidDataSource.java:1427)
at com.alibaba.druid.filter.FilterChainImpl.dataSource_connect(FilterChainImpl.java:5059)
at com.alibaba.druid.filter.stat.StatFilter.dataSource_getConnection(StatFilter.java:726)
at com.alibaba.druid.filter.FilterChainImpl.dataSource_connect(FilterChainImpl.java:5055)
at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:1405)
at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:1397)
at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:100)
at com.baomidou.dynamic.datasource.ds.ItemDataSource.getConnection(ItemDataSource.java:56)
at com.baomidou.dynamic.datasource.ds.AbstractRoutingDataSource.getConnection(AbstractRoutingDataSource.java:48)
at org.springframework.jdbc.datasource.DataSourceUtils.fetchConnection(DataSourceUtils.java:158)
at org.springframework.jdbc.datasource.DataSourceUtils.doGetConnection(DataSourceUtils.java:116)
at org.springframework.jdbc.datasource.DataSourceUtils.getConnection(DataSourceUtils.java:79)
at org.mybatis.spring.transaction.SpringManagedTransaction.openConnection(SpringManagedTransaction.java:80)
at org.mybatis.spring.transaction.SpringManagedTransaction.getConnection(SpringManagedTransaction.java:67)
at org.apache.ibatis.executor.BaseExecutor.getConnection(BaseExecutor.java:337)
at org.apache.ibatis.executor.SimpleExecutor.prepareStatement(SimpleExecutor.java:86)
at org.apache.ibatis.executor.SimpleExecutor.doQuery(SimpleExecutor.java:62)
at org.apache.ibatis.executor.BaseExecutor.queryFromDatabase(BaseExecutor.java:325)
at org.apache.ibatis.executor.BaseExecutor.query(BaseExecutor.java:156)
at org.apache.ibatis.executor.CachingExecutor.query(CachingExecutor.java:109)
at com.baomidou.mybatisplus.extension.plugins.MybatisPlusInterceptor.intercept(MybatisPlusInterceptor.java:81)
at org.apache.ibatis.plugin.Plugin.invoke(Plugin.java:62)
at com.sun.proxy.$Proxy407.query(Unknown Source)
at sun.reflect.GeneratedMethodAccessor273.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.ibatis.plugin.Invocation.proceed(Invocation.java:49)
at com.github.yulichang.interceptor.MPJInterceptor.intercept(MPJInterceptor.java:88)
at org.apache.ibatis.plugin.Plugin.invoke(Plugin.java:62)
at com.sun.proxy.$Proxy407.query(Unknown Source)
at org.apache.ibatis.session.defaults.DefaultSqlSession.selectList(DefaultSqlSession.java:151)
at org.apache.ibatis.session.defaults.DefaultSqlSession.selectList(DefaultSqlSession.java:145)
at org.apache.ibatis.session.defaults.DefaultSqlSession.selectList(DefaultSqlSession.java:140)
at sun.reflect.GeneratedMethodAccessor272.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.mybatis.spring.SqlSessionTemplate$SqlSessionInterceptor.invoke(SqlSessionTemplate.java:427)
at com.sun.proxy.$Proxy190.selectList(Unknown Source)
at org.mybatis.spring.SqlSessionTemplate.selectList(SqlSessionTemplate.java:224)
at com.baomidou.mybatisplus.core.override.MybatisMapperMethod.executeForMany(MybatisMapperMethod.java:166)
at com.baomidou.mybatisplus.core.override.MybatisMapperMethod.execute(MybatisMapperMethod.java:77)
at com.baomidou.mybatisplus.core.override.MybatisMapperProxy$PlainMethodInvoker.invoke(MybatisMapperProxy.java:148)
at com.baomidou.mybatisplus.core.override.MybatisMapperProxy.invoke(MybatisMapperProxy.java:89)
at com.sun.proxy.$Proxy218.selectList(Unknown Source)
at com.baomidou.mybatisplus.core.mapper.BaseMapper.selectOne(BaseMapper.java:173)
at java.lang.invoke.LambdaForm$DMH/128893786.invokeSpecial_LL_L(LambdaForm$DMH)
at java.lang.invoke.LambdaForm$BMH/141746689.reinvoke(LambdaForm$BMH)
at java.lang.invoke.LambdaForm$MH/138754346.invoker(LambdaForm$MH)
at java.lang.invoke.LambdaForm$MH/802573073.invokeExact_MT(LambdaForm$MH)
at java.lang.invoke.MethodHandle.invokeWithArguments(MethodHandle.java:627)
at com.baomidou.mybatisplus.core.override.MybatisMapperProxy$DefaultMethodInvoker.invoke(MybatisMapperProxy.java:162)
at com.baomidou.mybatisplus.core.override.MybatisMapperProxy.invoke(MybatisMapperProxy.java:89)
at com.sun.proxy.$Proxy218.selectOne(Unknown Source)
at com.baomidou.mybatisplus.extension.service.impl.ServiceImpl.getOne(ServiceImpl.java:202)
at com.baomidou.mybatisplus.extension.service.IService.getOne(IService.java:320)
at com.baomidou.mybatisplus.extension.service.IService$$FastClassBySpringCGLIB$$f8525d18.invoke(<generated>)
at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:779)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750)
at org.springframework.dao.support.PersistenceExceptionTranslationInterceptor.invoke(PersistenceExceptionTranslationInterceptor.java:139)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:186)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750)
at org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:692)
at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:779)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750)
at org.springframework.aop.framework.adapter.MethodBeforeAdviceInterceptor.invoke(MethodBeforeAdviceInterceptor.java:56)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:186)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750)
at org.springframework.aop.interceptor.ExposeInvocationInterceptor.invoke(ExposeInvocationInterceptor.java:95)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:186)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.proceed(CglibAopProxy.java:750)
at org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:692)
at com.alibaba.ttl.TtlRunnable.run(TtlRunnable.java:60)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Locked ownable synchronizers:
- <0x00000004ae298958> (a java.util.concurrent.ThreadPoolExecutor$Worker)
数据库连接为什么获取不到呢,应该有很多才对,而且应用查询的都是mysql小表,用完很快就能释放,其他线程就能立刻使用,一直找不到原因。
最后猜测有可能是连接泄露了,线程用完没有释放,一直持有,当持有数量达到最大数量,线程池也不再创建了,已有的线程池也得不到释放,继而导致无连接可用。
优化策略
maxWait
获取不到连接应该要超时的,这样可以感知到失败,现在为什么没有报错或者告警呢,排查配置发现没有配置获取连接最大等待时间maxWait,默认=-1即永远等待,这样会把资源耗尽,所以需要设置最大值比如1小时。获取不到就报错+监控+告警,人工发现能够立刻可以干预,并及时恢复故障
maxWait: 1800000
removeAbandoned
当一个连接被标记为被遗弃的时候,连接池会将其从池中移除,然后释放资源,以确保连接资源能够被及时回收和重新利用,防止连接的滥用和长时间占用。
其实他并不能准确地知道连接泄露了,而是针对于所有连接的一个回收操作。
在连接执行 SQL 语句期间处于 “running” 状态时,通常不会被认定为遗弃而回收。
# 移除 长时间 没有被释放的连接
removeAbandoned: true
#打印 当遇到 长时间 没有被释放的连接
logAbandoned: true
#将超过 多长时间 没有被释放的连接 进行 移除
removeAbandonedTimeoutMillis: 600000
不可以在生产环境使用,druid源码中有这样的日志
LOG.warn("removeAbandoned is true, not use in production.");