一 监控服务部分
传包
mkdir -p /usr
cd /usr
tar zxvf monitor.tar.gz
监控服务相关
\cp -rf /usr/monitor/managers/service.d/* /etc/systemd/system/
systemctl daemon-reload
编辑/usr/monitor/managers/prometheus-2.27.1.linux-amd64/config
目录下的文件,
添加修改对应ip。
启动监控
systemctl restart grafana
systemctl restart prometheus
systemctl restart alertmanager
grafana重置密码
sqlite3 /usr/monitor/managers/grafana-7.5.7/data/grafana.db
update user set password = '59acf18b94d7eb0694c61e60ce44c110c7a683ac6a8f09580d626f90f4a242000746579358d77dd9e570e83fa24faa88a8a6', salt = 'F3FAxVm33R' where login = 'admin';
.exit
或者,这样(可能…不管用):
/usr/monitor/managers/grafana-7.5.7/bin/grafana-cli admin reset-admin-password admin
登录grafana
修改数据源prometheus对应连接串
二 监控采集配置
分发
ansible all -m copy -a "src=/usr/monitor/collectors dest=/usr/monitor/collectors "
ansible all -m shell -a "\cp -rf /usr/monitor/collectors/service.d/* /etc/systemd/system/"
ansible all -m shell -a "systemctl daemon-reload"
ansible all -m shell -a "systemctl restart node_exporter"
二 修改JVM
Hdfs
1、env添加:
export HADOOP_NAMENODE_JMX_OPTS=" -javaagent:/usr/monitor/collectors/jmx_prometheus_javaagent-0.15.0.jar=11101:/usr/monitor/collectors/conf/config.yaml "
export HADOOP_DATANODE_JMX_OPTS=" -javaagent:/usr/monitor/collectors/jmx_prometheus_javaagent-0.15.0.jar=11102:/usr/monitor/collectors/conf/config.yaml "
2、HADOOP_HOME/bin/hdfs
中添加:
在对应的HADOOP_OPTS
后面加上$HADOOP_NAMENODE_JMX_OPTS
或$HADOOP_DATANODE_JMX_OPTS
例:
if [ "$COMMAND" = "namenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS $HADOOP_NAMENODE_JMX_OPTS"
...
elif [ "$COMMAND" = "datanode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
if [ "$starting_secure_dn" = "true" ]; then
HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS $HADOOP_DATANODE_JMX_OPTS"
else
HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS $HADOOP_DATANODE_JMX_OPTS"
Yarn
1、env添加:
export HADOOP_RESOURCEMANAGER_JMX_OPTS=" -javaagent:/usr/monitor/collectors/jmx_prometheus_javaagent-0.15.0.jar=11103:/usr/monitor/collectors/conf/config.yaml"
export HADOOP_NODEMANAGER_JMX_OPTS=" -javaagent:/usr/monitor/collectors/jmx_prometheus_javaagent-0.15.0.jar=11104:/usr/monitor/collectors/conf/config.yaml "
2、HADOOP_HOME/bin/yarn
中添加:
在对应的YARN_OPTS
后面加上$HADOOP_RESOURCEMANAGER_JMX_OPTS
或$HADOOP_NODEMANAGER_JMX_OPTS
例:
elif [ "$COMMAND" = "resourcemanager" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/rm-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
YARN_OPTS="$YARN_OPTS $YARN_RESOURCEMANAGER_OPTS $HADOOP_RESOURCEMANAGER_JMX_OPTS"
...
elif [ "$COMMAND" = "nodemanager" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS $HADOOP_NODEMANAGER_JMX_OPTS"
hiveserver2
env添加:
if [ "$SERVICE" = "hiveserver2" ]; then
export HADOOP_OPTS="$HADOOP_OPTS -javaagent:/usr/monitor/collectors/jmx_prometheus_javaagent-0.15.0.jar=11105:/usr/monitor/collectors/conf/config.yaml "
fi
if [ "$SERVICE" = "metastore" ]; then
export HADOOP_OPTS="$HADOOP_OPTS -javaagent:/usr/monitor/collectors/jmx_prometheus_javaagent-0.15.0.jar=11106:/usr/monitor/collectors/conf/config.yaml "
fi
hbase
env添加:
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -javaagent:/usr/monitor/collectors/jmx_prometheus_javaagent-0.15.0.jar=11107:/usr/monitor/collectors/conf/config.hbase.yaml "
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -javaagent:/usr/monitor/collectors/jmx_prometheus_javaagent-0.15.0.jar=11108:/usr/monitor/collectors/conf/config.hbase.yaml "
kafka
env添加:
export KAFKA_OPTS="-javaagent:/usr/monitor/collectors/jmx_prometheus_javaagent-0.15.0.jar=11109:/usr/monitor/collectors/conf/kafka-2_0_0.yaml"
三 配置短信告警
编辑/usr/monitor/managers/alertmanager-0.22.2.linux-amd64/alertmanager.yml
修改webhook
下 ip:port
prometheus rules
重加载:
curl -X POST http://localhost:9092/-/reload