背景
超时获取channel,虽然我们的代码里超时了,没拿到channel返回了,但netty中有一个线程仍然在获取channel,由于我们的程序直接返回了,没有释放channel,导致连接池里的channel可用数量下降!
更多问题情况参考这篇文章:https://cloud.tencent.com/developer/article/1704886
获取连接任务超时后,此时还有一个异步线程在执行着从连接池获取连接的操作,这个连接取出后由于不能再正常返回给业务线程了,因为此时业务线程因为获取连接超时异常了;又因为正常情况下,释放连接的操作由业务线程来触发完成。当获取连接超时的任务从连接池取完所有可用连接后,此时服务就不可用了。
问题解决
实现待获取连接超时任务策略,并设置acquireTimeoutMillis参数。但文章中给出的解决不适应本系统,略微修改解决。
观察FixedChannelPool源码:
public FixedChannelPool(Bootstrap bootstrap, ChannelPoolHandler handler, int maxConnections) {
this(bootstrap, handler, maxConnections, Integer.MAX_VALUE);
}
public FixedChannelPool(Bootstrap bootstrap, ChannelPoolHandler handler, int maxConnections, int maxPendingAcquires) {
this(bootstrap, handler, ChannelHealthChecker.ACTIVE, (AcquireTimeoutAction)null, -1L, maxConnections, maxPendingAcquires);
}
public FixedChannelPool(Bootstrap bootstrap, ChannelPoolHandler handler, ChannelHealthChecker healthCheck, AcquireTimeoutAction action, long acquireTimeoutMillis, int maxConnections, int maxPendingAcquires) {
this(bootstrap, handler, healthCheck, action, acquireTimeoutMillis, maxConnections, maxPendingAcquires, true);
}
public FixedChannelPool(Bootstrap bootstrap, ChannelPoolHandler handler, ChannelHealthChecker healthCheck, AcquireTimeoutAction action, long acquireTimeoutMillis, int maxConnections, int maxPendingAcquires, boolean releaseHealthCheck) {
this(bootstrap, handler, healthCheck, action, acquireTimeoutMillis, maxConnections, maxPendingAcquires, releaseHealthCheck, true);
}
public FixedChannelPool(Bootstrap bootstrap, ChannelPoolHandler handler, ChannelHealthChecker healthCheck, AcquireTimeoutAction action, long acquireTimeoutMillis, int maxConnections, int maxPendingAcquires, boolean releaseHealthCheck, boolean lastRecentUsed) {
super(bootstrap, handler, healthCheck, releaseHealthCheck, lastRecentUsed);
this.pendingAcquireQueue = new ArrayDeque();
this.acquiredChannelCount = new AtomicInteger();
if (maxConnections < 1) {
throw new IllegalArgumentException("maxConnections: " + maxConnections + " (expected: >= 1)");
} else if (maxPendingAcquires < 1) {
throw new IllegalArgumentException("maxPendingAcquires: " + maxPendingAcquires + " (expected: >= 1)");
} else {
if (action == null && acquireTimeoutMillis == -1L) {
this.timeoutTask = null;
this.acquireTimeoutNanos = -1L;
} else {
if (action == null && acquireTimeoutMillis != -1L) {
throw new NullPointerException("action");
}
if (action != null && acquireTimeoutMillis < 0L) {
throw new IllegalArgumentException("acquireTimeoutMillis: " + acquireTimeoutMillis + " (expected: >= 0)");
}
this.acquireTimeoutNanos = TimeUnit.MILLISECONDS.toNanos(acquireTimeoutMillis);
switch (action) {
case FAIL:
this.timeoutTask = new TimeoutTask() {
public void onTimeout(AcquireTask task) {
task.promise.setFailure(new TimeoutException("Acquire operation took longer then configured maximum time") {
public Throwable fillInStackTrace() {
return this;
}
});
}
};
break;
case NEW:
this.timeoutTask = new TimeoutTask() {
public void onTimeout(AcquireTask task) {
task.acquired();
FixedChannelPool.super.acquire(task.promise);
}
};
break;
default:
throw new Error();
}
}
this.executor = bootstrap.config().group().next();
this.maxConnections = maxConnections;
this.maxPendingAcquires = maxPendingAcquires;
}
}
可以看到构造函数中,如果我们选择较少参数的构造方法,它也会自动调用自己的多参构造,自动补全参数。这样的话就可以参考它这个来自定义我们自己的全参构造了。主要是修改acquireTimeoutAction和acquireTimeoutMillis。
可以设置acquireTimeoutAction为FAIL。这样获取不到的时候仍然可以抛异常,而且不会浪费资源。
测试代码
运行如下代码,发现修复前main函数,try代码块中获取超时,且线程1和线程2都释放了channel的情况下,线程3和线程4不能成功获取到channel;
而修复后,try代码块依然可以抛异常,且线程3和线程4可以正常获取到线程1和2释放的channel;
public class LongConnectionPoolClient {
private String host;
private int port;
// 连接池容量
private int poolSize;
//连接超时时间,毫秒
public static final int CONNECT_TIMEOUT = 5000;
private EventLoopGroup mGroup;
private Bootstrap mBootstrap;
InetSocketAddress addr;
ChannelPool pool;
FixedChannelPool.AcquireTimeoutAction acquireTimeoutAction = FixedChannelPool.AcquireTimeoutAction.FAIL;
//超时时间设置为5秒
long acquireTimeoutMillis = 5000;
int maxPendingAcquires = 100000;
public LongConnectionPoolClient(String host, int port, int poolSize) {
this.host = host;
this.port = port;
this.poolSize = poolSize;
build();
}
private void build() {
mGroup = new NioEventLoopGroup();
mBootstrap = new Bootstrap();
addr = new InetSocketAddress(host, port);
mBootstrap.option(ChannelOption.CONNECT_TIMEOUT_MILLIS, CONNECT_TIMEOUT)
.group(mGroup)
.remoteAddress(addr)
.channel(NioSocketChannel.class); // 使用NioSocketChannel来作为连接用的channel;
// pool = new FixedChannelPool(mBootstrap, new NettyChannelPoolHandler(), poolSize);
pool = new FixedChannelPool(mBootstrap, new NettyChannelPoolHandler(), ChannelHealthChecker.ACTIVE, acquireTimeoutAction,
acquireTimeoutMillis, poolSize, maxPendingAcquires, true, true);
}
// 获取连接
public Channel acquire() throws Exception {
Future<Channel> fch = pool.acquire(); // 【1】
Channel ch = fch.get(); //(timeoutMillis, TimeUnit.MILLISECONDS);
return ch;
}
// 释放连接
public void release(Channel channel) {
try {
if (channel != null) {
pool.release(channel);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
public class TestRun extends Thread {
private LongConnectionPoolClient client;
private String name;
public TestRun(LongConnectionPoolClient client, String name) {
this.client = client;
this.name = name;
}
@Override
public void run() {
Channel channel = null;
try {
channel = client.acquire();
System.out.println(name + "获取:" + channel.id());
} catch (Exception e) {
System.out.println("thread n timeout");
throw new RuntimeException(e);
}
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
client.release(channel);
System.out.println(name + "释放:" + channel.id());
}
}
public class PoolStart {
public static void main(String[] args) throws Exception {
LongConnectionPoolClient client = new LongConnectionPoolClient("127.0.0.1", 9120, 2);
LongConnectionPoolClient client = new LongConnectionPoolClient("10.26.14.6", 9290, 2);
TestRun testRun1 = new TestRun(client, "thread 1");
testRun1.start();
TestRun testRun2 = new TestRun(client, "thread 2");
testRun2.start();
Thread.sleep(1000);
try {
Channel ch1 = client.acquire();
} catch (Exception e) {
System.out.println("main timeout" + e);
}
Thread.sleep(15000);
TestRun testRun3 = new TestRun(client, "thread 3");
testRun3.start();
TestRun testRun4 = new TestRun(client, "thread 4");
testRun4.start();
}
}