说明
可以是虚拟机也可以是云服务器,内存不大。只安装hadoop,hive,spark,用于学习。其他组件如果服务器允许也可以安装。
本机需要安装jdk
同时,将环境的路径也配置下
cd ~
rpm -ivh jdk-8u261-linux-x64.rpm
vi /etc/profile
export JAVA_HOME=/usr/java/jdk1.8.0_261-amd64
export HADOOP_HOME=/user/share/bigdata/hadoop-3.2.1
export HIVE_HOME=/usr/share/bigdata/hive-3.1.2
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin
source /etc/profile
安装的位置为:/user/share/bigdata
资源上传到了~
安装Hadoop
cd ~
mkdir /usr/share/bigdata
tar -zxf hadoop-3.2.1.tar.gz -C /usr/share/bigdata
cd /usr/share/bigdata/hadoop-3.2.1
配置
vi etc/hadoop/hadoop-env.sh
JAVA_HOME=/usr/java/jdk1.8.0_261-amd64
创建hdfs用户
useradd hdfs
passwd hdfs
chown -R hdfs /usr/share/bigdata
su hdfs
core-site.xml修改数据保存位置,注意:hadoop.proxyuser..groups
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/share/bigdata/hadoop-3.2.1/data/tmp</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/usr/share/bigdata/hadoop-3.2.1</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/usr/share/bigdata/hadoop-3.2.1</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/usr/share/bigdata/hadoop-3.2.1</value>
</property>
其他均不需要修改,即可使用。
# 首次
hdfs namenode -format
# 启动namenode
hdfs --daemon start namenode
hdfs --daemon start datanode
# 启动yarn
yarn --daemon start resourcemanager
yarn --daemon start nodemanager
在Hadoop上执行MR程序
cd /usr/share/bigdata/hadoop-3.2.1/share/hadoop/mapreduce/
hadoop jar hadoop-mapreduce-examples-3.2.1.jar pi 10 20
安装Hive
需要先安装mysql,并且创建一个名为hive的数据库。
cd ~
mkdir /usr/share/bigdata
tar -zxf apache-hive-3.1.2-bin.tar.gz -C /usr/share/bigdata
mv /usr/share/bigdata/apache-hive-3.1.2-bin /usr/share/bigdata/hive-3.1.2
cd /usr/share/bigdata/hive-3.1.2
mv conf/hive-env.sh.template conf/hive-env.sh
vi conf/hive-env.sh
HADOOP_HOME=/usr/share/bigdata/hadoop-3.2.1
export HIVE_CONF_DIR=/usr/share/bigdata/hive-3.1.2/conf
export HIVE_AUX_JARS_PATH=/usr/share/bigdata/hive-3.1.2/lib
cd conf
mv hive-log4j2.properties.template hive-log4j2.properties
mv beeline-log4j2.properties.template beeline-log4j2.properties
mv hive-exec-log4j2.properties.template hive-exec-log4j2.properties
配置元数据
touch hive-site.xml
配置内容
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hive.metastore.local</name>
<value>true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://127.0.0.1:3306/hive?characterEncoding=UTF-8&serverTimezone=GMT%2B8&useSSL=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>docker</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>docker@12345</value>
</property>
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
</configuration>
修改jar
cp /usr/share/bigdata/hadoop-3.2.1/share/hadoop/common/lib/guava-27.0-jre.jar /usr/share/bigdata/hive-3.1.2/lib
rm -rf /usr/share/bigdata/hive-3.1.2/lib/guava-19.0.jar
将mysql驱动放入/usr/share/bigdata/hive-3.1.2/lib
schematool -initSchema -dbType mysql
nohup hiveserver2 1>/dev/null 2>&1 &
hive
# 退出 exit;
beeline -u jdbc:hive2://localhost:10000 -n root
错误
User: hdfs is not allowed to impersonate root
安装Spark
cd ~
mkdir /usr/share/bigdata
tar -zxf spark-3.0.1-bin-hadoop3.2.tgz -C /usr/share/bigdata
mv /usr/share/bigdata/spark-3.0.1-bin-hadoop3.2 /usr/share/bigdata/spark-3.0.1
cd /usr/share/bigdata/spark-3.0.1