准备
参考
配置HDFS
Alluxio集成HDFS作为底层存储 – Alluxio官网 | 分布式超大规模数据编排系统
在具有HA的群集上部署Alluxio - Alluxio v2.9.3 (stable) Documentation
运行集群
在集群上独立运行Alluxio – Alluxio官网 | 分布式超大规模数据编排系统
编译
编译Alluxio源代码 - Alluxio v2.9.3 (stable) Documentation
调大maven资源
export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
编译CDH特定版本
mvn -T 4C clean install -Phadoop-2 -Dhadoop.version=2.6.0-cdh5.10.0 -DskipTests
快速编译CDH特定版本
mvn -T 4C clean install \
-Dmaven.javadoc.skip=true -DskipTests\
-Dlicense.skip=true -Dcheckstyle.skip=true -Dfindbugs.skip=true \
-Pufs-hadoop-2 -Dufs.hadoop.version=2.6.0-cdh5.10.0
编译安装包
./dev/scripts/generate-tarballs single -hadoop-distribution=2.6.0-cdh5.10.0 -mvn-args "-Pufs-hadoop-2,-Dhadoop.version=2.6.0-cdh5.10.0,-DskipTests,-Dmaven.javadoc.skip,-Dfindbugs.skip,-Dcheckstyle.skip,-Dlicense.skip,-Dchecksum.skip"
步骤
1.配置
1.1 配置alluxio-site.properties
cp conf/alluxio-site.properties.template conf/alluxio-site.properties
# Common properties
#alluxio.master.hostname=
#alluxio.worker.hostname=
alluxio.locality.script=/opt/bigdata/alluxio/conf/alluxio-locality.sh
alluxio.zookeeper.enabled=true
alluxio.zookeeper.address=
alluxio.zookeeper.election.path=/alluxio/election
alluxio.zookeeper.leader.path=/alluxio/leader
alluxio.zookeeper.job.election.path=/alluxio/job_election
alluxio.zookeeper.job.leader.path=/alluxio/job_leader
alluxio.zookeeper.connection.timeout=30000
alluxio.zookeeper.session.timeout=120000
alluxio.work.dir=/var/alluxio
alluxio.logs.dir=/var/log/alluxio
alluxio.user.logs.dir=/var/log/alluxio
alluxio.master.journal.type=UFS
alluxio.master.journal.folder=hdfs://pabfs/alluxio/journal
alluxio.master.mount.table.root.alluxio=/
alluxio.master.mount.table.root.ufs=hdfs://pabfs
#alluxio.master.mount.table.root.option.alluxio.underfs.version=2.7
alluxio.underfs.hdfs.configuration=/etc/hadoop/conf/core-site.xml:/etc/hadoop/conf/hdfs-site.xml
alluxio.hadoop.kerberos.keytab.login.autorenewal=true
alluxio.master.keytab.file=/etc/security/keytab/hdfs.keytab
alluxio.master.principal=
alluxio.worker.keytab.file=/etc/security/keytab/hdfs.keytab
alluxio.worker.principal=
# Security properties
# alluxio.security.authorization.permission.enabled=true
# alluxio.security.authentication.type=SIMPLE
# Worker properties
alluxio.worker.ramdisk.size=20GB
alluxio.worker.tieredstore.levels=1
alluxio.worker.tieredstore.level0.alias=MEM
alluxio.worker.tieredstore.level0.dirs.path=/mnt/ramdisk
alluxio.worker.tieredstore.level0.dirs.mediumtype=MEM
alluxio.security.authentication.type=SIMPLE
#alluxio.security.login.username=hadoop
alluxio.master.security.impersonation.hdfs.users=*
alluxio.master.security.impersonation.hdfs.groups=*
alluxio.master.security.impersonation.hadoop.users=*
alluxio.master.security.impersonation.hadoop.groups=*
alluxio.master.security.impersonation.hive.users=*
alluxio.master.security.impersonation.hive.groups=*
alluxio.master.security.impersonation.presto.users=*
alluxio.master.security.impersonation.presto.groups=*
alluxio.security.login.impersonation.username=_HDFS_USER_
# User properties
alluxio.user.file.passive.cache.enabled=false
alluxio.user.file.readtype.default=CACHE
alluxio.user.file.writetype.default=CACHE_THROUGH
alluxio.user.short.circuit.preferred=true
alluxio.user.ufs.block.read.location.policy=alluxio.client.block.policy.DeterministicHashPolicy
alluxio.user.ufs.block.read.location.policy.deterministic.hash.shards=3
1.2 配置alluxio-env.sh
ALLUXIO_LOGS_DIR=/var/log/alluxio
ALLUXIO_USER_LOGS_DIR=/var/log/alluxio
ALLUXIO_WORKER_JAVA_OPTS=" -Dsun.security.krb5.debug=true"
ALLUXIO_WORKER_ATTACH_OPTS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=7070"
1.3 配置alluxio-locality.sh
#!/usr/bin/env bash
echo "node=$(hostname --fqdn),rack=/rack1"
1.3 分发配置
# 将conf/目录复制到conf/workers文件中指定的所有worker节点
./bin/alluxio copyDir conf/
2.格式化
./bin/alluxio formatMaster
3.启动
# 第一次需要mount
./bin/alluxio-start.sh all SudoMount
#启动/停止Alluxio
./bin/alluxio-stop.sh all
./bin/alluxio-stop.sh masters
./bin/alluxio-stop.sh master
./bin/alluxio-start.sh all
./bin/alluxio-start.sh masters
./bin/alluxio-start.sh master
#添加/删除Worker节点
./bin/alluxio-start.sh worker SudoMount # 启动本地 work
./bin/alluxio-stop.sh worker
3.1 更新master侧配置
- 更新所有master节点master配置
- 重新启动leading master
./bin/alluxio-stop.sh master
./bin/alluxio-start.sh master
- 等待先前的leading master成功作为standby master完成启动
- 更新剩余的standby masters
3.2 更新worker侧配置
- 只停止本地worker,更新此worker上的配置
- 重新启动此worke
4.验证
./bin/alluxio fs masterInfo
./bin/alluxio runTests
5.客户端访问集群
5.1 或者alluxio-site.properties配置参数或者Java Option
# 针对嵌入式日志方法的HA
# core-site.xml
alluxio.master.rpc.addresses=master_hostname_1:19998,master_hostname_2:19998,master_hostname_3:19998
# spark-defaults.conf
spark.executor.extraJavaOptions/spark.driver.extraJavaOptions
-Dalluxio.master.rpc.addresses=master_hostname_1:19998,master_hostname_2:19998,master_hostname_3:19998
# 针对ZooKeeper方法的HA
alluxio.zookeeper.enabled=true
alluxio.zookeeper.address=cdd07.bigdata.qa.pab.com.cn:2181,cdd08.bigdata.qa.pab.com.cn:2181,cdd09.bigdata.qa.pab.com.cn:2181
5.2 或者Path中使用URL Authority
alluxio://master_hostname_1:19998,master_hostname_2:19998,master_hostname_3:19998/path
alluxio://zk@zkHost1:2181,zkHost2:2181,zkHost3:2181/path
5.3 或者core-site.xml使用逻辑域名
<property>
<name>fs.alluxio.impl</name>
<value>alluxio.hadoop.FileSystem</value>
<description>The Alluxio FileSystem (Hadoop 1.x and 2.x)</description>
</property>
<property>
<name>fs.AbstractFileSystem.alluxio.impl</name>
<value>alluxio.hadoop.AlluxioFileSystem</value>
<description>The Alluxio AbstractFileSystem (Hadoop 2.x)</description>
</property>
<!--property>
<name>alluxio.zookeeper.enabled</name>
<value>true</value>
</property>
<property>
<name>alluxio.zookeeper.address</name>
<value>cdd07.bigdata.qa.pab.com.cn:2181,cdd08.bigdata.qa.pab.com.cn:2181,cdd09.bigdata.qa.pab.com.cn:2181</value>
</property-->
<property>
<name>alluxio.master.zookeeper.nameservices.pab-alluxio</name>
<value>node1,node2,node3</value>
</property>
<property>
<name>alluxio.master.zookeeper.address.pab-alluxio.node1</name>
<value>cdd07.bigdata.qa.pab.com.cn:2181</value>
</property>
<property>
<name>alluxio.master.zookeeper.address.pab-alluxio.node2</name>
<value>cdd08.bigdata.qa.pab.com.cn:2181</value>
</property>
<property>
<name>alluxio.master.zookeeper.address.pab-alluxio.node3</name>
<value>cdd09.bigdata.qa.pab.com.cn:2181</value>
</property>
6.验证功能
./bin/alluxio fs ls /user/hive/warehouse/tpcds_sf1000.db/item
./bin/alluxio fs ls -R -h /user/hive/warehouse/tpcds_sf1000.db/item