安装篇-Hadoop3.2.1集群安装分享

最新推荐文章于 2022-12-03 06:29:28 发布

道-闇影

最新推荐文章于 2022-12-03 06:29:28 发布

阅读量276

点赞数

文章标签： hadoop 大数据 big data

本文链接：https://blog.csdn.net/qq_41200768/article/details/108233146

版权

1.预备知识

1.1 linux须知

linux微内核的特性，vmware安装linux
centos：稳定
linux常操作目录：/bin,/usr,/etc
xshell:实用xshell操作centos
linux记事本：vi/vim

1.2 常用命令

帮助：man
目录：mkdir，rmdir，mv,ls,rm -rf,cd
文件：touch/vi,cat,cp,rm,more，grep
搜索：which,whereis,find
时间：date ,date -s
用户和组管理：useradd…,groupadd…
进程：ps -ef,kill -9 进程id，pkill -p id 或者/-f 进程
网络：netstat -aux
磁盘:df
压缩和解压：zip，unzip，tar

tar -zcvf 压缩
tar -zxvf 解压

软件：yum
- yum list
- yum install
- yum remove
- rpm -ivh,evh:了解
上传、下载（lrzsz）：rz，sz
定时任务：crontab -e
- min,h,d,m,week
- crontab -l
- crontab -r:删除

1.3 shell脚本

变量：

x,$x
运算符：

$[3+6]
判断:

if [];then fi
循环：

for(()) 或者 for x in list

do

done

while [ ]

do

done
函数:

function fun(){} fun

2.hadoop之windows配置

解压hadoop压缩文件
指定HADOOP_HOME
指定path：/bin，/sbin
测试：

hadoop version

3.在linux上搭建hadoop集群

集群成员：

主机 hdfs yarn

master namenode ,secondarynamenode resourcemanager

slave1 datanode nodemanager

slave2 datanode nodemanager

3.1 安装jdk8，hadoop3.2.1

上传压缩文件并解压(/usr)
设置环境变量（/etc/profile）

export JAVA_HOME=/usr/jdk8
export HADOOP_HOME=/usr/hadoop321
export PATH= $P A T H :$ JAVA_HOME/bin: $HADOOP_HOME/bin:$ HADOOP_HOME/sbin
激活配置文件

. /etc/profile
测试：

hadoop version

3.2 hdfs配置

core-site.xml

fs.defaultFS hdfs://master:9000
hdfs-site.xml

dfs.replication 2 dfs.http.address 0.0.0.0:5700 dfs.namenode.name.dir file:///root/hadoop/dfs/namenode dfs.datanode.data.dir file:///root/hadoop/dfs/datanode dfs.webhdfs.enabled true
初始化namenode

hdfs namenode -format
start-dfs.sh,stop-dfs.sh

#设置用户
HDFS_NAMENODE_USER=root
HDFS_DATANODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
hadoop-env.sh

export JAVA_HOME=/usr/jdk8

3.3 集群成员配置

域名与ip绑定(/etc/hosts)

192.168.85.129 master
192.168.85.130 slave1
192.168.85.131 slave2
配置workers（工作节点）(/usr/hadoop321/etc/hadoop/workers)

slave1

slave2
修改副本数量(数据节点数量)(hdfs-site.xml)

dfs.replication 2

3.4 yarn配置

yarn-site.xml

yarn.nodemanager.aux-services mapreduce_shuffle yarn.resourcemanager.hostname master yarn.resourcemanager.webapp.address master:8088 yarn.application.classpath /usr/hadoop321/etc/hadoop:/usr/hadoop321/share/hadoop/common/lib/*:/usr/hadoop321/share/hadoop/common/*:/usr/hadoop321/share/hadoop/hdfs:/usr/hadoop321/share/hadoop/hdfs/lib/*:/usr/hadoop321/share/hadoop/hdfs/*:/usr/hadoop321/share/hadoop/mapreduce/lib/*:/usr/hadoop321/share/hadoop/mapreduce/*:/usr/hadoop321/share/hadoop/yarn:/usr/hadoop321/share/hadoop/yarn/lib/*:/usr/hadoop321/share/hadoop/yarn/*
mapred-site.xml

mapreduce.framework.name yarn
start-yarn.sh,stop-yarn.sh

YARN_RESOURCEMANAGER_USER=root
YARN_NODEMANAGER_USER=root

3.5 cnetos克隆

修改主机名

hostnamectl set-hostanme 主机名
删除/tmp目录下的文件，使数据节点在浏览器端能看见(注意：防火墙关闭)

systemctl disable firewalld(开机不自启)

3.6 master免密登录slave

在root目录创建密钥：

ssh-keygen
authorized_keys拷贝到slave上

cat id_rsa.pub >> authorized_keys

scp 拷贝到salve的.ssh文件夹下

scp authorized_keys root@slave1:/root/.ssh

3.7 启动hadoop集群

在master上启动

start-all.sh
测试

jps
查看节点

hdfs dfsadmin -report

4. mapreduce实例

4.1 单词统计(入门)

/**
 * 英文单词统计
 */
public class WordCounter {
   
    //实现分词
    public static class MyMapper extends Mapper<LongWritable, Text,Text, IntWritable>{
   
        public static Text text = new Text();
        public static IntWritable intWritable = new IntWritable(1);

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
   
            String v = value.toString();
            String[] words = v.split(" ");
            for (String word : words) {
   
                text.set(word);
                context.write(text,intWritable);
            }
        }
    }

    //实现统计
    public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
   
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
   
            int count = 0;
            for (IntWritable value : values) {
   
                count+=value.get();
            }
            context.write(key,new IntWritable(count));
        }
    }

    public static void main(String[] args) {
   
        Configuration conf = new Configuration();
        try {
   
            // 任务
            Job job = Job.getInstance(conf);
            job.setJobName("firstJob");
            job.setJarByClass(WordCounter.class);
            // 设置mapper，reducer
            job.setMapperClass(MyMapper.class);
            job.setReducerClass(MyReducer.class);
            // 设置输出数据类型
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            // 设置输入输出目录
            FileInputFormat.setInputPaths(job,"data6");
            FileOutputFormat.setOutputPath(job,new Path("dTemp"));
            // 执行并关闭
            job.waitForCompletion(true);
            job.close();
        } catch (Exception e) {
   
            e.printStackTrace();
        }
    }
}

4.2 中文分词统计（ik）

/**
 * 中文单词统计
 */
public class CNWordCounter {
   
    // 实现中文分词
    public static class MyMapper extends Mapper<LongWritable, Text,Text, IntWritable>{
   

        public static Text text = new Text();
        public static IntWritable intWritable = new IntWritable(1);

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
   
            byte[] bytes = value.toString().getBytes();
            ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
            InputStreamReader isReader = new InputStreamReader(bis);
            IKSegmenter ikSegmenter = new IKSegmenter(isReader, true);

            Lexeme lexeme=null;
            while ((lexeme=ikSegmenter.next())!=null){
   
                String word = lexeme.getLexemeText();
                text.set(word);
                context.write(text,intWritable);
            }
        }
    }

    // 实现统计
    public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
   

        public static Text text = new Text();
        public static List<Record> list =new ArrayList<Record>();

        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
   
            int count=0;
            for (IntWritable value : values) {
   
                count+=value.get();
            }
//            context.write(key,new IntWritable(count));
            Record record = new Record(key.toString(), count);
            list.add(record);
        }
        // 实现排序
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
   
            Collections.sort(list);
            Collections.reverse(list);
            for (Record record : list) {
   
                text.set(record.getWord());
                context.write(text,new IntWritable(record.getCount()));
            }
        }
    }

    
    public static void main(String[] args) {
   
        Configuration conf = new Configuration();
        try {
   
            Job job = Job.getInstance(conf);
            job.setJobName("secondJob");
            job.setJarByClass(CNWordCounter.class);
            // 设置mapper,reducer
            job.setMapperClass(MyMapper.class);
            job.setReducerClass(MyReducer.class);
            // 设置输出类型
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            // 设置输入输出目录
            FileInputFormat.setInputPaths(job,"/test99/data2");
            FileOutputFormat.setOutputPath(job,new Path("/test99/out"));
            // 启动任务并关闭
            job.waitForCompletion(true);
            job.close();
        } catch (Exception e) {
   
            e.printStackTrace();
        }

    }
}

4.3 数据清洗（去重、去空、去非）

/**
 * 数据清洗：去空，去重，去非
 */
public class DataClear {
   

    public static void main(String[] args) {
   
        Configuration conf = new Configuration();
        try {
   
            Job job = Job.getInstance(conf);
            job.setJobName("clearJob");
            job.setJarByClass(DataClear.class);
            // mapper
            job.setMapperClass(RemoveReplyMapper.class);

            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(NullWritable.class);
            // 输入输出目录
            FileInputFormat.setInputPaths(job,"data4"