版本
jdk-8u301-linux-x64.tar.gz
hadoop-3.2.2.tar.gz
apache-hive-3.1.2-bin.tar.gz
mysql-connector-java-5.1.49.jar
apache-flume-1.9.0-bin.tar.gz
sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
spark-3.1.2-bin-hadoop3.2.tgz
hadoop
解压
tar -xf hadoop-3.2.2.tar.gz -C /opt
tar -xf jdk-8u301-linux-x64.tar.gz -C /opt
环境变量
vi /etc/profile
#java
export JAVA_HOME=/opt/jdk1.8.0_301
export PATH=$PATH:$JAVA_HOME/bin
#hadoop3.0
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
#不然启动hadoop会报错
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
保存生效
source /etc/profile
验证
java -version
java version "1.8.0_301"
Java(TM) SE Runtime Environment (build 1.8.0_301-b09)
Java HotSpot(TM) 64-Bit Server VM (build 25.301-b09, mixed mode)
hadoop version
Hadoop 3.2.2
Source code repository Unknown -r 7a3bc90b05f257c8ace2f76d74264906f0f7a932
Compiled by hexiaoqiao on 2021-01-03T09:26Z
Compiled with protoc 2.5.0
From source with checksum 5a8f564f46624254b27f6a33126ff4
This command was run using /opt/hadoop/share/hadoop/common/hadoop-common-3.2.2.jar
vi hadoop-env.sh(注意不要改成JAVA_HOME=xxx)
#54
export JAVA_HOME=/opt/jdk1.8.0_301
vi core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmp</value>
</property>
vi mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
#写上绝对路径,不然执行mr会报错 找不到主类
<property>
<name>mapreduce.application.classpath</name>
<value>/opt/hadoop/share/hadoop/mapreduce/*, /opt/hadoop/share/hadoop/mapreduce/lib/*</value>
</property>
vi yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
vi workers
slave1
slave2
scp -r /opt/* slave1:/opt/
scp -r /opt/* slave2:/opt/
scp -r /etc/profile slave1:/etc/
scp -r /etc/profile slave2:/etc/
格式化
hadoop namenode -format
启动
start-all.sh
jps
运行pi案例
[root@master hadoop]# hadoop jar /opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.2.jar pi 5 10
任务提交给yarn
运行结果
hive
#卸载自带mariadb
rpm -qa |grep mariadb
rpm -e --nodeps mariadb-libs-5.5.60-1.el7_5.x86_64
#解压mysql
tar -zxvf mysql-5.7.34-linux-glibc2.12-x86_64.tar.gz
mv mysql-5.7.34-linux-glibc2.12-x86_64 /opt/mysql
#写入环境变量
#mysql
export MYSQL_HOME=/opt/mysql
export PATH=$PATH:$MYSQL_HOME/bin
#在/etc/下创建my.cnf[mysqld]
basedir=/opt/mysql
datadir=/opt/mysql/data
socket=/tmp/mysql.sock
lower_case_table_names=1
user=root
character-set-server=utf8
[client]
default-character-set=utf8
[mysqld_safe]
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
#进入到mysql/bin目录下,初始化mysql
./mysqld --initialize-insecure --basedir=/opt/mysql --datadir=/opt/mysql/data --user=root
#复制mysql.server
cp /opt/mysql/support-files/mysql.server /etc/init.d/mysql
#启动与开机自启
chkconfig mysql on
service mysql start
mysql -uroot -p 回车
set password=password('Ahbvc!123');
grant all privileges on *.* to 'root'@'%' identified by '123456'; flush privileges;
安装hive
tar -xf apache-hive-3.1.2-bin.tar.gz -C /opt/
环境变量
#hive3.0
export HIVE_HOME=/opt/hive
export PATH=$PATH:$HIVE_HOME/bin
生效
source /etc/profile
cp hive-env.sh.template hive-env.sh
vi hive-env.sh48 HADOOP_HOME=/opt/hadoop
48 HADOOP_HOME=/opt/hadoop
cp hive-default.xml.template hive-site.xml
vi hive-site.xml
583 <name>javax.jdo.option.ConnectionURL</name>
584 <value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true&us
585 eSSL=false</value>
568 <name>javax.jdo.option.ConnectionPassword</name>
569 <value>Ahbvc!123</value>
798 <name>hive.metastore.schema.verification</name>
799 <value>false</value>
1102 <name>javax.jdo.option.ConnectionDriverName</name>
1103 <value>com.mysql.jdbc.Driver</value>
1127 <name>javax.jdo.option.ConnectionUserName</name>
1128 <value>root</value>
hive.exec.local.scratchdir、hive.downloaded.resources.dir、hive.querylog.location、hive.server2.logging.operation.log.location值换成目录/opt/hive/tmp
去掉注释
3215 Ensures commands with OVERWRITE (such as INSERT OVERWRITE) acquire Exclusive locks for
3216 Ensures commands with OVERWRITE (such as INSERT OVERWRITE) acquire Exclusive locks for;
mysql驱动放hive/lib目录下
cp /root/software/mysql-connector-java-5.1.49.jar -C /opt/hive/lib/
初始化
schematool -dbType mysql -initSchema
spark
spark on yarn模式
安装spark
tar -xf spark-3.1.2-bin-hadoop3.2.tgz -C /opt/
mv spark-3.1.2-bin-hadoop3.2.tgz spark
环境变量
#spark
export SPARK_HOME=/opt/spark
export PATH=$PATH:$SPARK_HOME/bin
vi spark-env.sh
export JAVA_HOME=/opt/jdk1.8.0_301
YARN_CONF_DIR=/opt/hadoop/etc/hadoop
export SPARK_HISTORY_OPTS="
-Dspark.history.ui.port=18080
-Dspark.history.fs.logDirectory=hdfs://master:9000/directory
-Dspark.history.retainedApplications=30"
vi spark-defaults.conf
spark.eventLog.enabled true
spark.eventLog.dir hdfs://master:9000/directory
spark.yarn.historyServer.address=master:18080
spark.history.ui.port=18080
vi workers
slave1
slave2
vi $HADOOP_HOME/etc/hadoop/yarn-site.xml 再原有的配置上增加以下内容
#SPARK ON YARN
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://master:19888/jobhistory/logs</value>
</property>
yarn-site.xml分发子节点
spark整个文件分发到字节点(不发也行)
/opt/spark/sbin目录下启动历史服务器(HistoryServer)
./start-history-server.sh
官方案例
spark-submit --master yarn --class org.apache.spark.examples.SparkPi $SPARK_HOME/examples/jars/spark-examples_2.12-3.1.2.jar 10
sqoop
安装
tar sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz -C /opt/
mv sqoop-1.4.7.bin__hadoop-2.6.0 sqoop
环境变量
#sqoop
export SQOOP_HOME=/opt/sqoop
export PATH=$PATH:$SQOOP_HOME/bin
vi sqoop-env.sh
export HADOOP_COMMON_HOME=/opt/hadoop
export HADOOP_MAPRED_HOME=/opt/hadoop
export HIVE_HOME=/opt/hive
驱动
[root@master lib]# pwd
/opt/sqoop/lib
[root@master lib]# cp /root/software/mysql-connector-java-5.1.49.jar ./
连接数据库
sqoop list-databases --connect jdbc:mysql://127.0.0.1:3306/ --username root -P
information_schema
ahbvc
douban
hive
mysql
performance_schema
shtd_industry
shtd_store
sys
flume
安装
tar -xf apache-flume-1.9.0-bin.tar.gz -C /opt/
mv apache-flume-1.9.0-bin/ flume
环境变量
#flume
export FLUME_HOME=/opt/flume
export PATH=$PATH:$FLUME_HOME/bin
vi flume-env.sh
export JAVA_HOME=/opt/jdk1.8.0_301
本地日志采集到hdfs上
flume 安装目录中创建 xxx.conf
vi simple-hdfs-flume.conf
a1.sources=r1
a1.sinks=k1
a1.channels=c1
a1.sources.r1.type=spooldir
a1.sources.r1.spoolDir=/opt/hadoop/logs/
a1.sources.r1.fileHeader=true
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.path=hdfs://master:9000/flume
a1.sinks.k1.hdfs.rollsize=1048760
a1.sinks.k1.hdfs.rollCount=0
a1.sinks.k1.hdfs.rollInterval=900
a1.sinks.k1.hdfs.useLocalTimeStamp=true
a1.channels.c1.type=file
a1.channels.c1.capacity=1000
a1.channels.c1.transactionCapacity=100
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
启动命令(注意guava.jar包 hadoop和flume留一最高版本)
flume-ng agent --conf-file simple-hdfs-flume.conf --name a1