【大数据安装】安装Java&&Hadoop

前置知识:

如何使用mac sftp进行传输

如何使用https://www.jianshu.com/p/f875343cb664

1.安装java

参考文章:ubuntu20.04安装java1.8环境_ubuntu 20.04 java1.8.0_381_格式化,何容易的博客-CSDN博客

sudo tar -zxvf  jdk-8u391-linux-x64.tar.gz
sudo vi /etc/profile

export JAVA_HOME=/home/ubuntu/software/java/jdk1.8.0_391
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH



source /etc/profile

2.安装Hadoop

参考文章

(初学者强烈推荐)Ubuntu 配置hadoop 超详细教程(全过程)_ubuntu安装hadoop_头发好多的博客-CSDN博客

vim etc/hadoop/core-site.xml

<property>
             <name>hadoop.tmp.dir</name>
             <value>file:/home/ubuntu/software/hadoop/hadoop-3.3.1/tmp</value>
             <description>Abase for other temporary directories.</description>
        </property>
        <property>
             <name>fs.defaultFS</name>
             <value>hdfs://localhost:9000</value>
</property>

完整代码:

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
             <name>hadoop.tmp.dir</name>
             <value>file:/home/ubuntu/software/hadoop/hadoop-3.3.1/tmp</value>
             <description>Abase for other temporary directories.</description>
        </property>
        <property>
             <name>fs.defaultFS</name>
             <value>hdfs://localhost:9000</value>
</property>
</configuration>

vim etc/hadoop/hdfs-site.xml

<property>
             <name>dfs.replication</name>
             <value>1</value>
        </property>
        <property>
             <name>dfs.namenode.name.dir</name>
             <value>file:/home/ubuntu/software/hadoop/hadoop-3.3.1/tmp/dfs/name</value>
        </property>
        <property>
             <name>dfs.datanode.data.dir</name>
             <value>file:/home/ubuntu/software/hadoop/hadoop-3.3.1/tmp/dfs/data</value>
        </property>

vim etc/hadoop/hadoop-env.sh

export JAVA_HOME="/home/ubuntu/software/java/jdk1.8.0_391" 

vim etc/hadoop/yarn-site.xml

<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>
<!--Resource Manager-->
<property>
    <name>yarn.resourcemanager.hostname</name>
    <value>VM-4-14-ubuntu</value><!--你的hostname的主机名-->
</property>

<!-- 选择调度器,默认容量 -->
<property>
	<description>The class to use as the resource scheduler.</description>
	<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>

<!-- ResourceManager处理调度器请求的线程数量,默认50;如果提交的任务数大于50,可以增加该值,但是不能超过3台 * 4线程 = 12线程(去除其他应用程序实际不能超过8) -->
<property>
	<description>Number of threads to handle scheduler interface.</description>
	<name>yarn.resourcemanager.scheduler.client.thread-count</name>
	<value>4</value>
</property>
<!-- 是否让yarn自动检测硬件进行配置,默认是false,如果该节点有很多其他应用程序,建议手动配置。如果该节点没有其他应用程序,可以采用自动 -->
<property>
	<description>Enable auto-detection of node capabilities such as
	memory and CPU.
	</description>
	<name>yarn.nodemanager.resource.detect-hardware-capabilities</name>
	<value>false</value>
</property>

<!-- 是否将虚拟核数当作CPU核数,默认是false,采用物理CPU核数 -->
<property>
	<description>Flag to determine if logical processors(such as
	hyperthreads) should be counted as cores. Only applicable on Linux
	when yarn.nodemanager.resource.cpu-vcores is set to -1 and
	yarn.nodemanager.resource.detect-hardware-capabilities is true.
	</description>
	<name>yarn.nodemanager.resource.count-logical-processors-as-cores</name>
	<value>false</value>
</property>
<!-- 虚拟核数和物理核数乘数,默认是1.0 -->
<property>
	<description>Multiplier to determine how to convert phyiscal cores to
	vcores. This value is used if yarn.nodemanager.resource.cpu-vcores
	is set to -1(which implies auto-calculate vcores) and
	yarn.nodemanager.resource.detect-hardware-capabilities is set to true. The	number of vcores will be calculated as	number of CPUs * multiplier.
	</description>
	<name>yarn.nodemanager.resource.pcores-vcores-multiplier</name>
	<value>1.0</value>
</property>

<!-- NodeManager使用内存数,默认8G,修改为4G内存 -->
<property>
	<description>Amount of physical memory, in MB, that can be allocated 
	for containers. If set to -1 and
	yarn.nodemanager.resource.detect-hardware-capabilities is true, it is
	automatically calculated(in case of Windows and Linux).
	In other cases, the default is 8192MB.
	</description>
	<name>yarn.nodemanager.resource.memory-mb</name>
	<value>1024</value>
</property>

<!-- nodemanager的CPU核数,不按照硬件环境自动设定时默认是8个,修改为4个 -->
<property>
	<description>Number of vcores that can be allocated
	for containers. This is used by the RM scheduler when allocating
	resources for containers. This is not used to limit the number of
	CPUs used by YARN containers. If it is set to -1 and
	yarn.nodemanager.resource.detect-hardware-capabilities is true, it is
	automatically determined from the hardware in case of Windows and Linux.
	In other cases, number of vcores is 8 by default.</description>
	<name>yarn.nodemanager.resource.cpu-vcores</name>
	<value>1</value>
</property>

<!-- 容器最小内存,默认1G -->
<property>
	<description>The minimum allocation for every container request at the RM	in MBs. Memory requests lower than this will be set to the value of this	property. Additionally, a node manager that is configured to have less memory	than this value will be shut down by the resource manager.
	</description>
	<name>yarn.scheduler.minimum-allocation-mb</name>
	<value>512</value>
</property>

<!-- 容器最小CPU核数,默认1个 -->
<property>
	<description>The minimum allocation for every container request at the RM	in terms of virtual CPU cores. Requests lower than this will be set to the	value of this property. Additionally, a node manager that is configured to	have fewer virtual cores than this value will be shut down by the resource	manager.
	</description>
	<name>yarn.scheduler.minimum-allocation-vcores</name>
	<value>1</value>
</property>

<!-- 容器最大CPU核数,默认4个,修改为2个 -->
<property>
	<description>The maximum allocation for every container request at the RM	in terms of virtual CPU cores. Requests higher than this will throw an
	InvalidResourceRequestException.</description>
	<name>yarn.scheduler.maximum-allocation-vcores</name>
	<value>1</value>
</property>


3.安装HBase

wget https://mirrors.tuna.tsinghua.edu.cn/apache/hbase/2.4.17/hbase-2.4.17-bin.tar.gz
 tar -zxvf hbase-2.4.17-bin.tar.gz 



export JAVA_HOME=/home/ubuntu/software/java/jdk1.8.0_391
export HBASE_MANAGES_ZK=true #PS需要修改 true需要hbase自己的zk,false依赖外部zk
export PATH=$PATH:$HBASE_HOME/bin
vim /etc/profile
 
#HBASE
export HBASE_HOME=/home/ubuntu/software/hbase/hbase-2.4.17
export PATH=${HBASE_HOME}/bin:$PATH
 
source /etc/profile

 sudo vim conf/hbase-site.xml

  <property>
    <name>hbase.cluster.distributed</name>
    <value>false</value>
  </property>
  <property>
    <name>hbase.tmp.dir</name>
    <value>/home/ubuntu/software/hbase/hbase-2.4.17/tmp</value>
  </property>
  <property>
    <name>hbase.unsafe.stream.capability.enforce</name>
    <value>false</value>
  </property>
          <property>
                  <name>hbase.zookeeper.property.clientPort</name>
                  <value>2321</value>
          </property>

        <property>
                  <name>hbase.rootdir</name>
                  <value>hdfs://localhost:9000/hbase</value>
          </property>
          <property>
                  <name>hbase.master.info.port</name>
                  <value>60010</value>
          </property>
          <property>
                  <name>hbase.zookeeper.property.dataDir</name>
                  <value>/home/ubuntu/software/hbase/hbase-2.4.17/zookeeper_data</value>
          </property>

原有 hbase-site.xml

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-->
<configuration>
  <!--
    The following properties are set for running HBase as a single process on a
    developer workstation. With this configuration, HBase is running in
    "stand-alone" mode and without a distributed file system. In this mode, and
    without further configuration, HBase and ZooKeeper data are stored on the
    local filesystem, in a path under the value configured for `hbase.tmp.dir`.
    This value is overridden from its default value of `/tmp` because many
    systems clean `/tmp` on a regular basis. Instead, it points to a path within
    this HBase installation directory.

    Running against the `LocalFileSystem`, as opposed to a distributed
    filesystem, runs the risk of data integrity issues and data loss. Normally
    HBase will refuse to run in such an environment. Setting
    `hbase.unsafe.stream.capability.enforce` to `false` overrides this behavior,
    permitting operation. This configuration is for the developer workstation
    only and __should not be used in production!__

    See also https://hbase.apache.org/book.html#standalone_dist
  -->
  <property>
    <name>hbase.cluster.distributed</name>
    <value>false</value>
  </property>
  <property>
    <name>hbase.tmp.dir</name>
    <value>/home/ubuntu/software/hbase/hbase-2.4.17/tmp</value>
  </property>
  <property>
    <name>hbase.unsafe.stream.capability.enforce</name>
    <value>false</value>
  </property>
          <property>
                  <name>hbase.zookeeper.property.clientPort</name>
                  <value>2321</value>
          </property>

        <property>
                  <name>hbase.rootdir</name>
                  <value>hdfs://localhost:9000/hbase</value>
          </property>
          <property>
                  <name>hbase.master.info.port</name>
                  <value>60010</value>
          </property>
          <property>
                  <name>hbase.zookeeper.property.dataDir</name>
                  <value>/home/ubuntu/software/hbase/hbase-2.4.17/zookeeper_data</value>
          </property>
                  <property>
  <name>hbase.wal.provider</name>
  <value>filesystem</value>
</property>
</configuration>

 修改后:

PS:需要修改hbase-env.sh 

export HBASE_MANAGES_ZK=true 

  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>

  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>localhost:2181,localhost:2182,localhost:2183</value>
  </property>
<property>
    <name>hbase.master.info.port</name>
    <value>60010</value>
  </property>
 <property>
    <name>hbase.tmp.dir</name>
    <value>/home/ubuntu/software/hbase/hbase-2.4.17/tmp</value>
  </property>
 <property>
    <name>hbase.unsafe.stream.capability.enforce</name>
    <value>false</value>
  </property>
<property>
                  <name>hbase.rootdir</name>
                  <value>hdfs://localhost:9000/hbase</value>
          </property>
                  <property>
  <name>hbase.wal.provider</name>
  <value>filesystem</value>
</property>

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值