1、启动参数配置
ps -ef | grep tomcat
admin 22305 1 32 May13 ? 3-12:02:40 /opt/edas/jdk/java/bin/java -Djava.util.logging.config.file=/home/admin/taobao-tomcat-production-7.0.59.3/conf/loggi
ng.properties
-Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager
-Djdk.tls.ephemeralDHKeySize=2048
-Djava.security.egd=file:/dev/./urandom
-Dlog4j.defaultInitOverride=true
-Dorg.apache.tomcat.util.http.ServerCookie.ALLOW_EQUALS_IN_VALUE=true
-Dorg.apache.tomcat.util.http.ServerCookie.ALLOW_HTTP_SEPARATORS_IN_V0=true
-Dlog4j.defaultInitOverride=false
-Dspas.identity=/home/admin/.spas_key/default
-Daddress.server.port=8080
-Dtomcat.monitor.http.binding.host=10.1.70.219
-Dahas.project.name=None
-Ddpath.id.group=default
-Xloggc:/home/admin/oom/gc.log -XX:NumberOfGCLogFiles=10
-XX:+PrintGCDateStamps -XX:+HeapDumpOnOutOfMemoryError -XX:+UseGCLogFileRotation
-XX:GCLogFileSize=20m -XX:HeapDumpPath=/home/admin/oom/ -XX:+PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
-Darms.appName=XXXX
-Dcatalina.logs=/home/admin/taobao-tomcat-production-7.0.59.3/logs
-Dignore.endorsed.dirs= -classpath /home/admin/taobao-tomcat-production-7.0.59.3/bin/bootstrap.jar:/home/admin/taobao-tomcat-production-7.0.59.3/bin/tomcat-juli.jar -Dcatalina.base=/home/admin/taobao-tomcat-production-7.0.59.3
-Dcatalina.home=/home/admin/taobao-tomcat-production-7.0.59.3
-Djava.io.tmpdir=/home/admin/taobao-tomcat-production-7.0.59.3/temp org.apache.catalina.startup.Bootstrap start
2、server配置
<?xml version='1.0' encoding='utf-8'?>
<Server port="8005" shutdown="TAOBAO-TOMCAT-SHUTDOWN">
<Listener className="org.apache.catalina.core.JasperListener" />
<Listener className="org.apache.catalina.core.JreMemoryLeakPreventionListener" />
<Listener className="org.apache.catalina.mbeans.GlobalResourcesLifecycleListener" />
<Listener className="org.apache.catalina.core.ThreadLocalLeakPreventionListener" />
<Listener className="com.taobao.tomcat.monitor.MonitorServiceListener"/>
<GlobalNamingResources>
<Resource name="UserDatabase" auth="Container"
type="org.apache.catalina.UserDatabase"
description="User database that can be updated and saved"
factory="org.apache.catalina.users.MemoryUserDatabaseFactory"
pathname="conf/tomcat-users.xml" />
</GlobalNamingResources>
<ModuleService name="Pandora" containerClassName="com.taobao.pandora.delegator.PandoraDelegator"
base="deploy" target="pandora.sar" />
<Service name="Catalina">
<Connector port="8080" protocol="HTTP/1.1"
connectionTimeout="15000" redirectPort="8443" maxParameterCount="1000"
maxThreads="400" maxHttpHeaderSize="16384"
maxPostSize="209715200" acceptCount="200" useBodyEncodingForURI="true" URIEncoding="ISO-8859-1" />
<Engine name="Catalina" defaultHost="localhost">
<Realm className="org.apache.catalina.realm.LockOutRealm">
<Realm className="org.apache.catalina.realm.UserDatabaseRealm"
resourceName="UserDatabase"/>
</Realm>
<Host name="localhost" appBase="deploy" unpackWARs="true" autoDeploy="false" deployOnStartup="true"
hostConfigClass="com.taobao.tomcat.container.host.AliHostConfig">
</Host>
</Engine>
</Service>
</Server>
3、配置解析
从上图的配置可以看出采取的BIO策略,连接超时时间为15s,参数最大键值对为1000,同时处理的最大线程数,即并发数为 400,除此工作的线程外,还可以让 200的连接处于等待队列 ,等待内部的工作线程空闲来处理队列中等待的连接,urlencoding的编码为 8859。
3.1 protocol
从 conf/server.xml
解析出的 ProtocolHandler
为Http11Protocol
,其中的Endopoint
为JIoEndpoint
是BIO
Catalina.load(){
Digester digester = createStartDigester();
inputSource.setByteStream(inputStream);
digester.push(this);
digester.parse(inputSource);
getServer().init();
}
//注册解析规则
createStartDigester(){
digester.addRule("Server/Service/Connector",
new ConnectorCreateRule());
digester.addRule("Server/Service/Connector",
new SetAllPropertiesRule(new String[]{"executor"}));
digester.addSetNext("Server/Service/Connector",
"addConnector",
"org.apache.catalina.connector.Connector");
}
通过protocol
动态创建 protocolHandler
@Override
public void begin(String namespace, String name, Attributes attributes)
throws Exception {
Service svc = (Service)digester.peek();
Executor ex = null;
if ( attributes.getValue("executor")!=null ) {
ex = svc.getExecutor(attributes.getValue("executor"));
}
Connector con = new Connector(attributes.getValue("protocol"));
if ( ex != null ) _setExecutor(con,ex);
digester.push(con);
}
public Connector(String protocol) {
setProtocol(protocol);
// Instantiate protocol handler
try {
Class<?> clazz = Class.forName(protocolHandlerClassName);
this.protocolHandler = (ProtocolHandler) clazz.newInstance();
} catch (Exception e) {
log.error(sm.getString(
"coyoteConnector.protocolHandlerInstantiationFailed"), e);
}
}
public void setProtocol(String protocol) {
if (AprLifecycleListener.isAprAvailable()) {
if ("HTTP/1.1".equals(protocol)) {
setProtocolHandlerClassName
("org.apache.coyote.http11.Http11AprProtocol");
} else if ("AJP/1.3".equals(protocol)) {
setProtocolHandlerClassName
("org.apache.coyote.ajp.AjpAprProtocol");
} else if (protocol != null) {
setProtocolHandlerClassName(protocol);
} else {
setProtocolHandlerClassName
("org.apache.coyote.http11.Http11AprProtocol");
}
} else {
// 7.0.59
if ("HTTP/1.1".equals(protocol)) {
setProtocolHandlerClassName
("org.apache.coyote.http11.Http11Protocol");
} else if ("AJP/1.3".equals(protocol)) {
setProtocolHandlerClassName
("org.apache.coyote.ajp.AjpProtocol");
} else if (protocol != null) {
setProtocolHandlerClassName(protocol);
}
}
}
3.2 maxThreads
public Http11Protocol() {
endpoint = new JIoEndpoint();
cHandler = new Http11ConnectionHandler(this);
((JIoEndpoint) endpoint).setHandler(cHandler);
setSoLinger(Constants.DEFAULT_CONNECTION_LINGER);
setSoTimeout(Constants.DEFAULT_CONNECTION_TIMEOUT);
setTcpNoDelay(Constants.DEFAULT_TCP_NO_DELAY);
}
初始化的时候设置的setMaxConnections
为0
public JIoEndpoint() {
// Set maxConnections to zero so we can tell if the user has specified
// their own value on the connector when we reach bind()
setMaxConnections(0);
// Reduce the executor timeout for BIO as threads in keep-alive will not
// terminate when the executor interrupts them.
setExecutorTerminationTimeoutMillis(0);
}
在调用Bootstrap.start()
方法的时候,最终会到JIoEndpoint
调用bind()
方法,此方法会将maxConnections
的值设置与maxThreads
一样,前提是没有外部配置的Excutor
@Override
public void bind() throws Exception {
// Initialize thread count defaults for acceptor
if (acceptorThreadCount == 0) {
acceptorThreadCount = 1;
}
// Initialize maxConnections
if (getMaxConnections() == 0) {
// User hasn't set a value - use the default
setMaxConnections(getMaxThreadsExecutor(true));
}
if (serverSocketFactory == null) {
if (isSSLEnabled()) {
serverSocketFactory =
handler.getSslImplementation().getServerSocketFactory(this);
} else {
serverSocketFactory = new DefaultServerSocketFactory(this);
}
}
if (serverSocket == null) {
try {
if (getAddress() == null) {
serverSocket = serverSocketFactory.createSocket(getPort(),
getBacklog());
} else {
serverSocket = serverSocketFactory.createSocket(getPort(),
getBacklog(), getAddress());
}
} catch (BindException orig) {
String msg;
if (getAddress() == null)
msg = orig.getMessage() + " <null>:" + getPort();
else
msg = orig.getMessage() + " " +
getAddress().toString() + ":" + getPort();
BindException be = new BindException(msg);
be.initCause(orig);
throw be;
}
}
}
protected int getMaxThreadsExecutor(boolean useExecutor) {
if (useExecutor && executor != null) {
if (executor instanceof java.util.concurrent.ThreadPoolExecutor) {
return ((java.util.concurrent.ThreadPoolExecutor)executor).getMaximumPoolSize();
} else if (executor instanceof ResizableExecutor) {
return ((ResizableExecutor)executor).getMaxThreads();
} else {
return -1;
}
} else {
return maxThreads;
}
}
请求进入入口为JIoEndpoint#Acceptor.run()
@Override
public void run() {
int errorDelay = 0;
while (running) {
//获取链接
countUpOrAwaitConnection();
Socket socket = null;
socket = serverSocketFactory.acceptSocket(serverSocket);
if (running && !paused && setSocketOptions(socket)) {
// Hand this socket off to an appropriate processor
//提交请求处理
if (!processSocket(socket)) {
countDownConnection();
// Close socket right away
closeSocket(socket);
}
} else {
countDownConnection();
// Close socket right away
closeSocket(socket);
}
}
}
LimitLatch
内部维护了一个AQS
队列,当大于的时候会等待
public class LimitLatch {
private class Sync extends AbstractQueuedSynchronizer {
private static final long serialVersionUID = 1L;
public Sync() {
}
@Override
protected int tryAcquireShared(int ignored) {
long newCount = count.incrementAndGet();
if (!released && newCount > limit) {
// Limit exceeded
count.decrementAndGet();
return -1;
} else {
return 1;
}
}
@Override
protected boolean tryReleaseShared(int arg) {
count.decrementAndGet();
return true;
}
}
}
3.3 acceptCount
JDK代码参数解释:backlog – requested maximum length of the queue of incoming connections.
1) 初始化
public class Connector extends LifecycleMBeanBase {
protected ProtocolHandler protocolHandler = null;
protected static HashMap<String,String> replacements =
new HashMap<String,String>();
static {
replacements.put("acceptCount", "backlog");
replacements.put("connectionLinger", "soLinger");
replacements.put("connectionTimeout", "soTimeout");
replacements.put("rootFile", "rootfile");
}
public boolean setProperty(String name, String value) {
String repl = name;
if (replacements.get(name) != null) {
repl = replacements.get(name);
}
return IntrospectionUtils.setProperty(protocolHandler, repl, value);
}
}
// AbstractProtcol
public void setBacklog(int backlog) { endpoint.setBacklog(backlog); }
// AbstractEndpoint
public void setBacklog(int backlog) { if (backlog > 0) this.backlog = backlog; }
2)bind
// JIoEndpoint
public void bind() throws Exception {
serverSocket = serverSocketFactory.createSocket(getPort(),getBacklog(), getAddress());
}
//DefaultServerSocketFactory
@Override
public ServerSocket createSocket (int port, int backlog)
throws IOException {
return new ServerSocket (port, backlog);
}
3) 参数解释
// ServerSocket
/**
*Params:
* port – the port number, or 0 to use a port number that is automatically allocated.
* backlog – requested maximum length of the queue of incoming connections.
*/
public ServerSocket(int port, int backlog) throws IOException {
this(port, backlog, null);
}
4、日志接入
日志路径:
/home/admin/logs/thread.log
/home/admin/logs/gc.log
日志采集配置:thread
,gc
解析规则:
thread
:
# 原始日志
2022-05-24 14:57:00|3|22305,http-bio-8080-exec-114,TIMED_WAITING|36113842,1,173928445492,150930000000
# 解析规则
(?<time>[\d]+-[\d]+-[\d]+\s+[\d]+:[\d]+:[\d]+)\|\d+\|(?<pid>\d+),(?<thread>[\w|-]+),(?<status>[\w|_]+)\|(?<f1>\d+),(?<f2>\d+),(?<f3>\d+),(?<f4>\d+)
gc
:
2022-04-12 10:46:00|3|9214,gc,PS Scavenge|8,94,83554,929103
(?<time>[\d]+-[\d]+-[\d]+\s+[\d]+:[\d]+:[\d]+)\|(?<statId>\d+)\|(?<thread>\d+),gc,(?<collector>[\w|\s]+)\|(?<gcCount>\d+),(?<gcMilliesecond>\d+),(?<gcCounts>\d+),(?<gcMillieseconds>\d+)
数据集:thread
,gc
5、打分计算
统一按照1min粒度计算
5.1 线程
通过实时获取时,当为BIO取,内部工作线程的前缀名是AbstractProtocol.init()
初始化的时候,通过String endpointName = getName();
获取到的。以下是以http协议的线程前缀名。用于统计每一分钟连接数
- BIO:
http-bio
- NIO:
http-nio
- APR:
http-apr
健康度打分计算
select __ip__ as host
, `time`, pid, count(thread) as cnt
, multiIf(cnt between 0 and 5, 100, cnt > 400 , 0, round(pow(cos((cnt-5)/400 * pi() / 2),6) * 100) ) as score
from thread
where thread like 'http-bio%'
and _at_date_ = toDate(now(),'UTC')
and toDateTime(`time`) BETWEEN addMinutes(now(),-3) and now()
group by `time`,pid,host
order by toDateTime(`time`) desc
5.2 GC
X:最近一分钟FULLGC花费的时间,单位为秒
SHSNC_DCOS_GCINFO_CHECK里面的FULLGC对应的类型值为MarkSweep的数据
并且该表里面的fullGC时间是累计fullGC总时间,单位为毫秒,需要和上一个时间点的
数据相减才能的出来最近一分钟FULLGC的
k:权重,设置gc敏感程度
k=40 敏感的接口一类,fgc超过10秒以上会有业务影响(每分钟FGC时间达到7秒时打分开始低于60分)
k=4 默认值,无特殊标记的都用这个权重 fgc超过20秒以上会有业务影响(每分钟FGC时间达到19秒时打分开始低于60分)
k=1 不怎么敏感的一类 (每分钟FGC时间达到36秒时打分开始低于60分)
健康度打分计算
SELECT `time`
, thread as pid
, sum(gcMilliesecond) as cost
, multiIf(sum(gcMilliesecond)/1000 between 0 and 60, round(pow(cos(sum(gcMilliesecond)/1000/60 * pi()/2), 4) * 100), 0) as score
from jvm
WHERE _at_date_ = toDate(now(),'UTC')
and toDateTime(`_at_timestamp_`,'Asia/Shanghai') BETWEEN addMinutes(now(),-3) and now()
and collector in('PS MarkSweep','PS Scavenge')
group by `time`, pid