win10下hadoop学习

最新推荐文章于 2023-07-18 00:45:55 发布

dragondyt

最新推荐文章于 2023-07-18 00:45:55 发布

阅读量127

点赞数

分类专栏：大数据文章标签： hadoop

本文链接：https://blog.csdn.net/qq_35136937/article/details/116044424

版权

大数据专栏收录该内容

2 篇文章 0 订阅

订阅专栏

下载

下载hadoop

在这里插入图片描述

配置环境变量

在这里插入图片描述
path路径

配置文件

‪hadoop-3.2.2\etc\hadoop\core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
 <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000/</value>
 </property>
 <!-- 当前用户全设置成root -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>

<!-- 不开启权限检查 -->
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>

</configuration>

hadoop-3.2.2\etc\hadoop\hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/d:/DevTools/hadoop-3.2.2/data/namenode</value>
        <description>NameNode directory for namespace and transaction logs storage.</description>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/d:/DevTools/hadoop-3.2.2/data/datanode</value>
        <description>DataNode directory</description>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
</configuration>

hadoop-3.2.2\etc\hadoop\mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>

hadoop-3.2.2\etc\hadoop\yarn-site.xml

<?xml version="1.0"?>
<configuration>
<property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>localhost</value>
    </property>
</configuration>

缺失文件

在这里插入图片描述
github下载

遇到的问题

在这里插入图片描述

解决方案:安装DirectX修复工具增强版修复系统组件缺失,在C://Windows/System32目录下放置hadoop.dll

在这里插入图片描述

ps: 本来打算放弃的,win下的坑太多了,一顿瞎搞后终于成功了,还有点小问题

在这里插入图片描述

节点未格式化

执行

hdfs namenode  -format

输入Y
在这里插入图片描述

启动

执行命令或双击hadoop-3.2.2\sbin下的start-all.cmd

./start-all.cmd

访问8088

在这里插入图片描述

访问9000

It looks like you are making an HTTP request to a Hadoop IPC port. This is not the correct port for the web interface on this daemon.

访问9870(hadoop新版本默认)

在这里插入图片描述

关闭

执行命令或双击hadoop-3.2.2\sbin下的stop-all.cmd

./stop-all.cmd

开发时修改权限

core-site.xml

<!-- 当前用户全设置成root -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>

<!-- 不开启权限检查 -->
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>

测试

环境准备

使用图形界面添加等会用的的目录
上传测试文件word,内容为

my name is abc, I can read abc.
this is a demo for mapreduce
I'm learning hadoop
hadoop and mapreduce

在这里插入图片描述

代码编写

pom.xml

...
        <!-- hadoop 依赖 -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.10.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.10.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.10.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.10.0</version>
        </dependency>
...

package com.hadoop.demo.mapreduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MyJob {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //1 Configuration创建配置对象
        Configuration cfg=new Configuration();
        //2 getInstance创建job对象
        Job job = Job.getInstance(cfg);
        //3 set设置job,map,reduce类,map,reduce输出类
        job.setJarByClass(MyJob.class);
        job.setMapperClass(MyMap.class);
        job.setReducerClass(MyReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //4 FileInputFormat.addInputPath指定读取地址
        FileInputFormat.addInputPath(job,new Path("hdfs://localhost:9000/wc/input"));
        //5 FileOutputFormat.setOutputPath指定写入地址
        FileOutputFormat.setOutputPath(job,new Path("hdfs://localhost:9000/wc/output"));
        //6 waitForCompletion等待mapreduce完成
        boolean flag = job.waitForCompletion(true);
        //7 System.exit关闭系统
        System.exit(flag?0:1);
    }
}

package com.hadoop.demo.mapreduce;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class MyMap extends Mapper<LongWritable,Text,Text,IntWritable> {
    private IntWritable count=new IntWritable(1);
    private Text w=new Text();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //1 toString字符串转换
        String line = value.toString();
        //2 split分割单词
        String[] words = line.split(" ");
        //3 for遍历
        for (String word:words){
            //4 set转换Text
            w.set(word);
            //5 write写入
            context.write(w,count);
        }
    }
}

package com.hadoop.demo.mapreduce;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class MyReduce extends Reducer<Text,IntWritable,Text,IntWritable> {
    private IntWritable i=new IntWritable();
    @Override
    protected void reduce(Text word, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        //1 int计数器
        int sum=0;
        //2 for遍历value
        for (IntWritable it:values){
            //3 get转换int
            sum +=it.get();
        }
        //4 set转换IntWritable
        i.set(sum);
        //5 write写入
        context.write(word,i);
    }
}