Install Dependencies
for root user
Install Rpms
yum install -y libevent-devel automake libtool flex bison gcc-c++ openssl-devel make cmake doxygen glib-devel python-devel bzip2-devel svn libevent-devel cyrus-sasl-devel wget git unzip openldap-devel db4-devel lsb
Install pip
cd /tmp
wget https://bootstrap.pypa.io/get-pip.py --no-check-certificate
python get-pip.py
Build Environment
for hadoop user
Build Dirs Tree
[hadoop@localhost ~]$ tree -L 2 /opt/beh/
/opt/beh/
├── buildenv
│ ├── beh_env
│ └── impala_env
└── core
├── impala -> impala-2.5.0-cdh5.7.1
├── impala-2.5.0-cdh5.7.1
├── jdk -> jdk1.7.0_79
├── jdk1.7.0_79
├── maven -> apache-maven-3.3.9
└── maven-3.3.9
7 directories, 2 files
impala_env
[hadoop[@localhost](https://my.oschina.net/u/570656) ~]$ cat /opt/beh/buildenv/impala_env
export LANG=zh_CN.UTF-8
export BEH_HOME=/opt/beh
export JAVA_HOME=/opt/beh/core/jdk
export MVN_HOME=/opt/beh/core/maven
export IMPALA_HOME=$BEH_HOME/core/impala
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$MVN_HOME/bin:$PATH
Build
cd /opt/beh/core/impala
source /opt/beh/buildenv/impala_env
source bin/impala-config.sh
${IMPALA_HOME}/buildall.sh -noclean -skiptests -so -release
BUILD SUCCESS
MOVE THE IMPALA TO HADOOP CLUSTER
MODIFY Environment
Modify Cluster Environment
Modify env
- MODIFY /opt/beh/conf/beh_env
#IMPALA 2.5.0
export IMPALA_HOME=$BEH_HOME/core/impala
export IMPALA_CONF_DIR=$IMPALA_HOME/conf
export CLUSTER_DIR=/opt/beh/core/impala/testdata/cluster
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$IMPALA_HOME/so/:/opt/beh/core/hadoop/lib/native:/opt/beh/core/jdk/jre/lib/amd64/server
export PATH=$IMPALA_HOME/bin:$PATH
Modify hdfs
- modify /opt/beh/core/hadoop/etc/hadoop/hdfs-site.xml
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.skip.checksum</name>
<value>false</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/opt/beh/data/domain/sc_socket</value>
</property>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>755</value>
</property>
<property>
<name>dfs.block.local-path-access.user</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.client.file-block-storage-locations.timeout</name>
<value>10000</value>
</property>
- restart hdfs
Modify impala
cd /opt/beh/core/impala
mkdir conf
cd conf
ln -s /opt/beh/core/hadoop/etc/hadoop/core-site.xml ./
ln -s /opt/beh/core/hadoop/etc/hadoop/hdfs-site.xml ./
ln -s /opt/beh/core/hadoop/etc/hadoop/yarn-site.xml ./
ln -s /opt/beh/core/hive/conf/hive-site.xml ./
cd /opt/beh/core/impala
mkdir so
find . -name '*.so' | xargs -i cp {} ./so
cd so
ln -s libstdc++.so libstdc++.so.6
- modify bin/set-classpath.sh
[hadoop@localhost impala]$ cat bin/set-classpath.sh
#!/bin/bash
CLASSPATH=\
$IMPALA_HOME/conf:\
$IMPALA_HOME/fe/src/test/resources:\
$IMPALA_HOME/fe/target/classes:\
$IMPALA_HOME/fe/target/dependency:\
$IMPALA_HOME/fe/target/test-classes:\
${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\
${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\
${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar:
for jar in `ls ${IMPALA_HOME}/fe/target/dependency/*.jar`; do
CLASSPATH=${CLASSPATH}:$jar
done
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$IMPALA_HOME/so/:/opt/beh/core/hadoop/lib/native:/opt/beh/core/jdk/jre/lib/amd64/server
#for jar in `ls ${IMPALA_HOME}/testdata/target/dependency/*.jar`; do
# CLASSPATH=${CLASSPATH}:$jar
#done
export CLASSPATH
-
modify bin/start-statestored.sh in the last second row.
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$IMPALA_HOME/so/
-
ln -s /opt/beh/core/hive/lib/mysql-connector-java-5.1.31.jar $IMPALA_HOME/fe/target/dependency/
-
source /opt/beh/conf/beh_env
modify script
create impala conf file
vim $IMPALA_HOME/conf/impala
IMPALA_CATALOG_SERVICE_HOST=hadoop003
IMPALA_STATE_STORE_HOST=hadoop003
IMPALA_STATE_STORE_PORT=24000
IMPALA_BACKEND_PORT=22000
IMPALA_LOG_DIR=/opt/beh/logs/impala
IMPALA_CATALOG_ARGS=" -log_dir=${IMPALA_LOG_DIR} \
-load_catalog_in_background=true"
IMPALA_STATE_STORE_ARGS=" -log_dir=${IMPALA_LOG_DIR} \
-state_store_port=${IMPALA_STATE_STORE_PORT}"
IMPALA_SERVER_ARGS=" \
-log_dir=${IMPALA_LOG_DIR} \
-catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} \
-state_store_port=${IMPALA_STATE_STORE_PORT} \
-use_statestore \
-state_store_host=${IMPALA_STATE_STORE_HOST} \
-mem_limit=70% \
-default_pool_max_requests=-1 \
-be_port=${IMPALA_BACKEND_PORT} "
ENABLE_CORE_DUMPS=false
modify start-statestored.sh
vim $IMPALA_HOME/bin/start-statestored.sh
...
BUILD_TYPE=latest
#STATESTORED_ARGS=""
source $IMPALA_HOME/conf/impala
STATESTORED_ARGS=${STATESTORED_ARGS:-$IMPALA_STATE_STORE_ARGS}
BINARY_BASE_DIR=${IMPALA_HOME}/be/build
...
modify start-catalogd.sh
vim $IMPALA_HOME/bin/start-catalogd.sh
...
BUILD_TYPE=latest
#CATALOGD_ARGS=""
source $IMPALA_HOME/conf/impala
CATALOGD_ARGS=${CATALOGD_ARGS:-$IMPALA_CATALOG_ARGS}
BINARY_BASE_DIR=${IMPALA_HOME}/be/build
...
modify start-impalad.sh
vim $IMPALA_HOME/bin/start-impalad.sh
...
BUILD_TYPE=latest
#IMPALAD_ARGS=""
source $IMPALA_HOME/conf/impala
IMPALAD_ARGS=${IMPALAD_ARGS:-$IMPALA_SERVER_ARGS}
BINARY_BASE_DIR=${IMPALA_HOME}/be/build
...
start impala
-
cd /opt/beh/core/impala
-
start the statestored , only one in the cluster
./bin/start-statestored.sh
- start the catalogd , only one in the cluster
./bin/start-catalogd.sh
- start the impalad , every one with the datanode
./bin/start-impalad.sh -state_store_host={ip address of statestore}
- connect impala
./bin/impala-shell.sh -i {ip address of one impalad}
#impala安装记录
#问题记录
##错误一
错误描述:impala-2.7.0-cdh5.9.0启动相关服务的时候,总是加载HDFS schema为localhost:20500,无法获取正确的HDFS路径。暂无解决办法。
解决办法:修改版本为impala-2.5.0-cdh5.7.1进行编译安装,无任何报错。
##错误二 错误描述:
[hadoop@hadoop004 impala]$ ./bin/impala-shell.sh --help
Traceback (most recent call last):
File "/opt/beh/core/impala/infra/python/bootstrap_virtualenv.py", line 279, in <module>
kudu_client_dir = find_kudu_client_install_dir()
File "/opt/beh/core/impala/infra/python/bootstrap_virtualenv.py", line 210, in find_kudu_client_install_dir
custom_client_dir = os.environ["KUDU_CLIENT_DIR"]
File "/usr/lib64/python2.6/UserDict.py", line 22, in __getitem__
raise KeyError(key)
KeyError: 'KUDU_CLIENT_DIR'
Error in /opt/beh/core/impala/bin/impala-python at line 25:
错误为检查是否存在kudu,暂不测试kudu,故注释相关检查命令
解决办法:
[hadoop@hadoop003 impala]vim ${IMPALA_HOME}/infra/python/bootstrap_virtualenv.py
if __name__ == "__main__":
parser = optparse.OptionParser()
parser.add_option("-l", "--log-level", default="INFO",
choices=("DEBUG", "INFO", "WARN", "ERROR"))
parser.add_option("-r", "--rebuild", action="store_true", help="Force a rebuild of"
" the virtualenv even if it exists and appears to be completely up-to-date.")
parser.add_option("--print-ld-library-path", action="store_true", help="Print the"
" LD_LIBRARY_PATH that should be used when running python from the virtualenv.")
options, args = parser.parse_args()
# if options.print_ld_library_path:
# kudu_client_dir = find_kudu_client_install_dir()
# print os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'),
# os.path.join(kudu_client_dir, 'lib64')])
# sys.exit()
#
logging.basicConfig(level=getattr(logging, options.log_level))
if options.rebuild:
delete_virtualenv_if_exist()
setup_virtualenv_if_not_exists()
# install_kudu_client_if_possible()
##错误三 错误描述:
[hadoop@hadoop003 impala]$ ./bin/start-catalogd.sh --help > /tmp/start-catalogd.sh
/opt/beh/core/impala/testdata/cluster/admin: line 46: CDH_MAJOR_VERSION: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 57: TARGET_FILESYSTEM: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 67: KUDU_IS_SUPPORTED: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 97: IMPALA_CLUSTER_LOGS_DIR: unbound variable
/opt/beh/core/impala/testdata/cluster/admin: line 87: IS_OSX: unbound variable
解决办法:
[hadoop@hadoop003 impala]$ vim ${IMPALA_HOME}/testdata/cluster/admin
'''
'''
IS_OSX=false
IMPALA_CLUSTER_LOGS_DIR=$IMPALA_HOME/logs/cluster
KUDU_IS_SUPPORTED=false
TARGET_FILESYSTEM=hdfs
CDH_MAJOR_VERSION=5
DIR=$(dirname $0)
NODES_DIR="$DIR/cdh$CDH_MAJOR_VERSION"
NODE_COUNT=3
NODE_PREFIX=node-
'''
'''
#对于alluxio的读写支持
vim $IMPALA_HOME/bin/set-classpath.sh
...
CLASSPATH=\
$IMPALA_HOME/conf:\
$IMPALA_HOME/fe/src/test/resources:\
$IMPALA_HOME/fe/target/classes:\
$IMPALA_HOME/fe/target/dependency:\
$IMPALA_HOME/fe/target/test-classes:\
${HIVE_HOME}/lib/datanucleus-api-jdo-3.2.1.jar:\
${HIVE_HOME}/lib/datanucleus-core-3.2.2.jar:\
${HIVE_HOME}/lib/datanucleus-rdbms-3.2.1.jar:\
${HIVE_HOME}/lib/alluxio-core-client-1.2.0-jar-with-dependencies.jar:
...