在windows编写MapReduce

背景:在centos7.5上装载hadoop-2.8.5集群。现在要在windows编写MR

搭建windows 的hadoop环境

    1. 下载与linux集群同个版本的hadoop

7ed6e64e138b8bce1fea9622e1a3929ab82.jpg

    1. 配置hadoop环境变量
      1. HADOOP_HOME:D:\hadoop-2.8.5
      2. HADOOP_BIN_PATH:%HADOOP_HOME%\bin
      3. HADOOP_PREFIX:%HADOOP_HOME%
      4. 在Path后面添加;%HADOOP_HOME%\bin;%HADOOP_HOME%\sbin;
    2. 解压winutils-master.zip,将文件添加到hadoop_home的bin目录下
    3. 查看本地hadoop版本

3875e587ab86b6ef6a5c8ed459b21dd84e5.jpg

编写M与R

import org.apache.hadoop.io.IntWritable;
import
org.apache.hadoop.io.LongWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Mapper;

import
java.io.IOException;

/**
 * WCMapper
 */

public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
   
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        Text keyOut =
new Text();
       
IntWritable valueOut = new IntWritable();
       
String[] arr = value.toString().split(" ");
        for
(String s : arr){
            keyOut.set(s)
;
           
valueOut.set(1);
           
context.write(keyOut,valueOut);
       
}
    }
}

 

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;



import java.io.IOException;



/**

 * Reducer

 */

public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable>{

    /**

     * reduce

     */

    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        int count = 0 ;

        for(IntWritable iw : values){

            count = count + iw.get() ;

        }

        String tno = Thread.currentThread().getName();

        System.out.println(tno + " : WCReducer :" + key.toString() + "=" + count);

        context.write(key,new IntWritable(count));

    }

}

项目的pom.xml依赖

<dependencies>
  <dependency>
    <groupId>
org.apache.hadoop</groupId>
    <artifactId>
hadoop-common</artifactId>
    <version>
2.8.5</version>
  </dependency>
  <dependency>
    <groupId>
org.apache.hadoop</groupId>
    <artifactId>
hadoop-mapreduce-client-jobclient</artifactId>
    <version>
2.8.5</version>
  </dependency>
  <dependency>
    <groupId>
commons-cli</groupId>
    <artifactId>
commons-cli</artifactId>
    <version>
1.2</version>
  </dependency>
  <dependency>
    <groupId>
junit</groupId>
    <artifactId>
junit</artifactId>
    <version>
4.11</version>
    <scope>
test</scope>
  </dependency>
</dependencies>

添加hadoop版本文件夹下的share文件

 

effb9cae0cfb1a856c2529f796e5cd9a641.jpg

编写job类

    1. 使用本地hdfs系统
 import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;



/**

 *

 */

public class WCApp {

    public static void main(String[] args) throws Exception {

//这个使用的是window的本地,没用到linux集群

        Configuration conf = new Configuration();



        conf.set("fs.defaultFS", "file:///");



        Job job = Job.getInstance(conf);



        //设置job的各种属性

        job.setJobName("WCApp");                        //作业名称

        job.setJarByClass(WCApp.class);                 //搜索类

        job.setInputFormatClass(TextInputFormat.class); //设置输入格式



        //设置输出格式类

        //job.setOutputFormatClass(SequenceFileOutputFormat.class);



        //添加输入路径

        FileInputFormat.addInputPath(job,new Path(args[0]));

        //设置输出路径

        FileOutputFormat.setOutputPath(job,new Path(args[1]));



        //设置最大切片数

        //FileInputFormat.setMaxInputSplitSize(job,13);

        //最小切片数

        //FileInputFormat.setMinInputSplitSize(job,1L);



        //设置分区类

        job.setPartitionerClass(MyPartitioner.class);   //设置自定义分区



        //设置合成类

        job.setCombinerClass(WCReducer.class);          //设置combiner类



        job.setMapperClass(WCMapper.class);             //mapper类

        job.setReducerClass(WCReducer.class);           //reducer类



        job.setNumReduceTasks(3);                       //reduce个数



        job.setMapOutputKeyClass(Text.class);           //

        job.setMapOutputValueClass(IntWritable.class)//



        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(IntWritable.class);     //



        job.waitForCompletion(true);

    }

}

注意:这边的args[0] 与args[1]是运行参数

04bdad0e7c8b60a2c32abc1b0035166475b.jpg

12dab19f3a0bf1a4132676643b7419ea521.jpg

    1. 使用linux集群hdfs系统
  1. import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class RunJob {     public static void main(String[] args){         Configuration conf=new Configuration();         try{             Job job = Job.getInstance(conf, "word count");             job.setJarByClass(RunJob.class);             job.setMapperClass(MyMap.class);             job.setReducerClass(MyReduce.class);             job.setOutputKeyClass(Text.class);             job.setOutputValueClass(IntWritable.class);             //解决No job jar file set. User classes may not be found. See Job or Job#setJar(String)报错的问题             job.setJar("E:\\bigData\\workspace\\testMR\\out\\artifacts\\MyMR\\MyMR.jar");             FileInputFormat.addInputPath(job,new Path(args[0]));             FileSystem fs= FileSystem.get(conf);             Path op1=new Path(args[1]);             if(fs.exists(op1)){                 fs.delete(op1, true);                 System.out.println("存在此输出路径,已删除!!!");             }             FileOutputFormat.setOutputPath(job,op1);             System.exit(job.waitForCompletion(true)?0:1);         }catch (Exception e){             e.printStackTrace();         }     }

注意:这边需要导入core-site.xml文件,

70777cb0cd4b73c8df606e1e7b2e8485ffa.jpg

变配置args[0]与args[1]的值

911507bcc62a5b7d030d07b8bb41f19a6f9.jpg

到hdfs页面查看结果:

560ebfc01e21d28c986e11b52904fa88d08.jpg

    1. 可能出现的错:Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z

36c169a09cd9934e9b42d6371efdbaea12c.jpg

 

修改方法:下载与hadoop同个版本的resource文件,然后取出该文件夹下的hadoop-common-project\hadoop-common\src\main\java\org\apache\hadoop\io\nativeio的NativeIO.class文件,放到项目的指定包下:

440e275854d5640b8e490801457568d6f59.jpg

然后修改access方法的返回值。

3158c6e9a43f0b9690034d9df13a53b24ea.jpg

 

两种方法主要是输入源、输出地址的区别.

 

 

转载于:https://my.oschina.net/905042249/blog/2874917

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值