Windows运行Hadoop MapReduce

1 导包

<dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-common</artifactId>
            <version>2.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-client</artifactId>
            <version>2.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
            <version>2.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.anarres.lzo</groupId>
            <artifactId>lzo-hadoop</artifactId>
            <version>1.0.0</version>
            <scope>compile</scope>
        </dependency>

2.Mapper,Reducer,App三个文件,如下

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int count = 0 ;
        for(IntWritable iw : values){
            count = count + iw.get() ;
        }
        String tno = Thread.currentThread().getName();
        System.out.println(tno + " : WCReducer :" + key.toString() + "=" + count);
        context.write(key,new IntWritable(count));
    }
}
 
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;

public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        Text keyOut = new Text();
        IntWritable valueOut = new IntWritable();
        String[] arr = value.toString().split(" ");
        for(String s : arr){
            keyOut.set(s);
            valueOut.set(1);
            context.write(keyOut,valueOut);
        }
    }
}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
public class WCApp {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        Job job = Job.getInstance(conf);
        //设置job的各种属性
        job.setJobName("WCApp");                        //作业名称
        job.setJarByClass(WCApp.class);                 //搜索类
        job.setInputFormatClass(TextInputFormat.class); //设置输入格式
        //设置输出格式类
        //job.setOutputFormatClass(SequenceFileOutputFormat.class);
        //添加输入路径
        *FileInputFormat.addInputPath(job,new Path(args[0]));*
        //设置输出路径
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置最大切片数
        //FileInputFormat.setMaxInputSplitSize(job,13);
        //最小切片数
        //FileInputFormat.setMinInputSplitSize(job,1L);
        //设置分区类
        job.setPartitionerClass(MyPartitioner.class);   //设置自定义分区
        //设置合成类
        job.setCombinerClass(WCReducer.class);          //设置combiner类
        job.setMapperClass(WCMapper.class);             //mapper类
        job.setReducerClass(WCReducer.class);           //reducer类
        job.setNumReduceTasks(3);                       //reduce个数
        job.setMapOutputKeyClass(Text.class);           //
        job.setMapOutputValueClass(IntWritable.class);  //
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);     //
        job.waitForCompletion(true);
    }
}

3.安装hadoop,解压hadoop压缩包,并配置环境变量。

4.运行报错
(null) entry in command string: null chmod 0700
根据网络上的两种解决办法。第一种
在程序中指定hadoop.home.dir

System.setProperty("hadoop.home.dir","c:/Users/Administrator/Desktop/hadoop-2.7.7/bin" );

毫无卵用
第二种,参考 : https://ask.hellobi.com/blog/jack/5063
第二种 第一步:
下载winutils.exe,libwinutils.lib 拷贝到%HADOOP_HOME%\bin目录
毫无卵用
第二步:
下载hadoop.dll,并拷贝到c:\windows\system32目录中
报的错终于不一样了,如下:
file permissions : java.io.IOException: (null) entry in command string: null ls -F

再百度,说输入路径是需要写具体路径,我找到RUN->Edit Configuration,如下图
在这里插入图片描述
这里一定注意,第一个是输入参数,要指定具体文件,第二个是输出参数,out文件夹必须没有,有就要删掉。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值