大数据_本地与远程(测试篇)

最新推荐文章于 2022-11-21 12:32:28 发布

Smile_dip

最新推荐文章于 2022-11-21 12:32:28 发布

阅读量551

点赞数

本文链接：https://blog.csdn.net/Smile936/article/details/81107255

版权

软件包和代码的位置：

链接：https://pan.baidu.com/s/1hmm2f-NkkHuHbhlnmo0iew 密码：vjwy

wordcount：

1:关于hadoop里面的参数 BooleanWritable：标准布尔型数值 ByteWritable：单字节数值 DoubleWritable：双字节数 FloatWritable：浮点数 IntWritable：整型数，类int LongWritable：长整型数 Text：使用UTF8格式存储的文本，类String 都要以get（）set（s）的形式使用，像对象里的变量一样去使用

3、写代码：reduce

hadoop的代码：

1.本地运行：

代码：

package com.itstar;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class wc_hadoop {

    public static class mymap extends Mapper<LongWritable,Text,Text,IntWritable>{
        private static IntWritable one=new IntWritable(1);
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
           String [] words=value.toString().split(" ");
           for(String word:words){
               context.write(new Text(word),one);
           }
        }
    }

    public static class myreduce extends Reducer<Text,IntWritable,Text,IntWritable>{
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sum=0;
            for(IntWritable val:values){
                sum+=val.get();
            }
            context.write(key,new IntWritable(sum));
        }
    }

    public static void main(String[] args) throws Exception{
//由于hdfs的用户验证，也可以修改运行的配置文件hdfs-site.xml里面的dfs.permissions为false,不设置会报错
        System.setProperty("HADOOP_USER_NAME","hadoop"); 
        Configuration conf=new Configuration();
        Job job=Job.getInstance(conf,"wc_hadoop");
     
        Path inp=new Path(args[1]);
        //输出的文件地址如果存在就删除，但是本地删除可以使用这种办法
        FileSystem hf=inp.getFileSystem(conf);
        //远程要使用下面的办法
        //FileSystem hf=FileSystem.get(conf);
        if(hf.exists(inp)){
            hf.delete(inp,true);
        }
        job.setJarByClass(wc_hadoop.class);
        FileInputFormat.addInputPath(job,new Path(args[0]));
        job.setMapperClass(mymap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setReducerClass(myreduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileOutputFormat.setOutputPath(job,inp);
        job.waitForCompletion(true);
    }

}