Hive HA配置手把手详解-基于Haproxy配置cdh集群的HiveServer2负载均衡

最新推荐文章于 2022-07-21 22:18:05 发布

果木

最新推荐文章于 2022-07-21 22:18:05 发布

阅读量1.5k

点赞数

分类专栏：大数据文章标签： hadoop hive Ha

本文链接：https://blog.csdn.net/xiaohai798/article/details/113886237

版权

大数据专栏收录该内容

15 篇文章 1 订阅

订阅专栏

背景需求

hive 在离线分析，数仓中是最常用的工具，业务人员不需要熟悉mr，编程基础就可以通过sql操作数据。hive 的性能及稳定性在生产中必须保证。hive可以通过HAPROXY进行多节点负载均衡来达到目的。、

近期现场环境cdh hadoop平台nn1 所在节点机器故障，由于hdfs做了HA，namenode自动切换到了nn2，但是hive sql任务配置的连接都是nn1:10000 ，所以任务都出了问题。故决定启用hive Ha

前提环境准备

可用的基于cdh搭建的hadoop集群，并且Hdfs 、yarn、hive等安装完毕可用，cloudera manager可用,hdfsHa可用

进行配置

进入hive服务配置，添加两个及以上的hive metastore和server

直接安装

报错

改用 yum -y install haproxy --nogpgcheck 命令：

修改配置

/etc/haproxy/haproxy.cfg ，原始文件：

#---------------------------------------------------------------------
# Example configuration for a possible web application.  See the
# full configuration options online.
#
#   http://haproxy.1wt.eu/download/1.4/doc/configuration.txt
#
#---------------------------------------------------------------------

#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
    # to have these messages end up in /var/log/haproxy.log you will
    # need to:
    #
    # 1) configure syslog to accept network log events.  This is done
    #    by adding the '-r' option to the SYSLOGD_OPTIONS in
    #    /etc/sysconfig/syslog
    #
    # 2) configure local2 events to go to the /var/log/haproxy.log
    #   file. A line like the following can be added to
    #   /etc/sysconfig/syslog
    #
    #    local2.*                       /var/log/haproxy.log
    #
    log         127.0.0.1 local2

    chroot      /var/lib/haproxy
    pidfile     /var/run/haproxy.pid
    maxconn     4000
    user        haproxy
    group       haproxy
    daemon

    # turn on stats unix socket
    stats socket /var/lib/haproxy/stats

#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
    mode                    http
    log                     global
    option                  httplog
    option                  dontlognull
    option http-server-close
    option forwardfor       except 127.0.0.0/8
    option                  redispatch
    retries                 3
    timeout http-request    10s
    timeout queue           1m
    timeout connect         10s
    timeout client          1m
    timeout server          1m
    timeout http-keep-alive 10s
    timeout check           10s
    maxconn                 3000

#---------------------------------------------------------------------
# main frontend which proxys to the backends
#---------------------------------------------------------------------
frontend  main *:5000
    acl url_static       path_beg       -i /static /images /javascript /stylesheets
    acl url_static       path_end       -i .jpg .gif .png .css .js

    use_backend static          if url_static
    default_backend             app

#---------------------------------------------------------------------
# static backend for serving up images, stylesheets and such
#---------------------------------------------------------------------
backend static
    balance     roundrobin
    server      static 127.0.0.1:4331 check

#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend app
    balance     roundrobin
    server  app1 127.0.0.1:5001 check
    server  app2 127.0.0.1:5002 check
    server  app3 127.0.0.1:5003 check
    server  app4 127.0.0.1:5004 check

修改后：

#---------------------------------------------------------------------
# Example configuration for a possible web application.  See the
# full configuration options online.
#
#   http://haproxy.1wt.eu/download/1.4/doc/configuration.txt
#
#---------------------------------------------------------------------

#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
    # to have these messages end up in /var/log/haproxy.log you will
    # need to:
    #
    # 1) configure syslog to accept network log events.  This is done
    #    by adding the '-r' option to the SYSLOGD_OPTIONS in
    #    /etc/sysconfig/syslog
    #
    # 2) configure local2 events to go to the /var/log/haproxy.log
    #   file. A line like the following can be added to
    #   /etc/sysconfig/syslog
    #
    #    local2.*                       /var/log/haproxy.log
    #
    log         127.0.0.1 local2

    chroot      /var/lib/haproxy
    pidfile     /var/run/haproxy.pid
    maxconn     2000
    user        haproxy
    group       haproxy
    daemon

    # turn on stats unix socket
    stats socket /var/lib/haproxy/stats

#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
    mode                    http
    log                     global
    option                  httplog
    option                  dontlognull
    option http-server-close
    option forwardfor       except 127.0.0.0/8
    option                  redispatch
    retries                 3
    timeout http-request    10s
    timeout queue           1m
    timeout connect         1d
    timeout client          1d
    timeout server          1d
    timeout http-keep-alive 10s
    timeout check           100s
    maxconn                 1000


listen status#定义管理界面
    bind bd1.bcht:1080#管理界面访问IP和端口
    mode http#管理界面所使用的协议
    option httplog
    maxconn 50#最大连接数
    stats refresh 30s#30秒自动刷新
    stats  uri /stats

listen hive
    bind bd1.bcht:10001#ha作为proxy所绑定的IP和端口
    mode tcp#以4层方式代理，重要
    option tcplog
    balance source#调度算法 'leastconn' 最少连接数分配，或者 'roundrobin'，轮询分
    server hiveServer1 bd1.bcht:10000 check
    server hiveServer2 bd2.bcht:10000 check

#---------------------------------------------------------------------
# main frontend which proxys to the backends
#---------------------------------------------------------------------
frontend  main *:5000
    acl url_static       path_beg       -i /static /images /javascript /stylesheets
    acl url_static       path_end       -i .jpg .gif .png .css .js

    use_backend static          if url_static
    default_backend             app

#---------------------------------------------------------------------
# static backend for serving up images, stylesheets and such
#---------------------------------------------------------------------
backend static
    balance     roundrobin
    server      static 127.0.0.1:4331 check

#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend app
    balance     roundrobin
    server  app1 127.0.0.1:5001 check
    server  app2 127.0.0.1:5002 check
    server  app3 127.0.0.1:5003 check
    server  app4 127.0.0.1:5004 check

注意修改的参数：

global 里面的maxconn 即允许的最大连接数