hadoop中mapreduce具体操作代码 reduce join partion wordcount wc actcount actcount2 common mapjoin mpcombine

part  reducejoin wc

maven导入的包

<dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
      <scope>test</scope>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-core</artifactId>
      <version>2.6.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-common -->
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-common</artifactId>
      <version>2.6.0</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>2.6.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-hdfs</artifactId>
      <version>2.6.0</version>
    </dependency>
  </dependencies>

创建方法 如果存在删除全部文件

 

package com.myh.common;

import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.IOException;

//查看输出目录是否存在存在就删除

public class DelLocalDriectory {
    public static void del(String path) throws IOException {
        try {
            File file = new File(path);
            if (file.exists()) {
                FileUtils.forceDelete(file);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
}

第一种最简单的wordcount  一段文字 根据" " 统计string 以及出现次数

遇到的问题少一个斜杠 会 抱奇怪的错误

无法输出

main方法

package com.myh.wc;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class WcDemo {
    public static void main(String[] args) throws Exception {
        Configuration cfg = new Configuration();
//        cfg.set("mapreduce.framework.name","yarn");
        //准备1个空任务
        Job job = Job.getInstance(cfg);
        //任务的入口类
        job.setJarByClass(WcDemo.class);
        //给任务起名字
        job.setJobName("wc");
        //设置输入文件路径
        FileInputFormat.addInputPath(job, new Path("file:///e:/word.txt"));
        //设置输出文件路径
        FileOutputFormat.setOutputPath(job, new Path("file:///e:/res1"));
        //将你的mapper代码嵌入到整个mapreduce框架流程中
        job.setMapperClass(WcMapper.class);
        //设置mapper文件输出的信息类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //将你的mapper代码嵌入到整个mapreduce框架流程中
        job.setReducerClass(WcReduce.class);
        //设置reduce文件输出的信息类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        //启动整个流程

        job.waitForCompletion(true);

    }
}
package com.myh.wc;


import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 分片的數據處理
 * Mapper<LongWritable, Text,Text, IntWritable>
 * 前兩個是輸入  local hdfs文件 ---》1.行號 2.每行的字符串
 * 後兩個是輸出 輸出到環狀緩衝區 1.key 2.value
 */
public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    IntWritable one = new IntWritable(1);

    @Override
    protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
        //將一行字符串進行分割
        String[] words = value.toString().split(" ");
        //循環將所有的單詞和個數寫入環狀緩衝區
        for (String word : words) {
            context.write(new Text(word),one);
        }

    }

}
package com.myh.wc;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @Description:
 * @author:myh
 * @date: 2021/9/16  0:39
 */
public class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        //计算这一组中有多少个1
        int num = 0;
        for (IntWritable i : values) {
            num += i.get();
        }
        //向你的输出文件进行输出
        context.write(key, new IntWritable(num));
    }
}

使用的word.txt内容

i hadoop
sasa dsa
i haddoop
i haddoop
i haddoop
i haddoopi haddoopi
i haddoop
i haddoop
i haddoop
i haddoop
i haddoopi haddoopi
i haddoopi haddoop
i haddoop
i haddoop
i haddoopi haddoopi
i haddoopi haddoop
i haddoop
i haddoop
i haddoopi haddoopi
i haddoop

结果输出

第二种 统计不同的人 id加操作名 的统计

package com.njbdqn.actcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class AcDemo {
    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(AcDemo.class);
        job.setJobName("ac");
        FileInputFormat.addInputPath(job, new Path("hdfs://192.168.100.195:9000/logs/202001"));
        FileOutputFormat.setOutputPath(job, new Path("file:///e:/res1"));
        job.setMapperClass(AcMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(UserAction.class);
        job.setReducerClass(AcReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.waitForCompletion(true);
    }
}

 

package com.njbdqn.actcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class AcMapper extends Mapper<LongWritable, Text,Text,UserAction> {


    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //读取行数据 amd,16G,2.7GHz,2020-01-02#00:06:24,COLLECT,400283,goods.html,228778,FIREFOX,90
        String[] infos = value.toString().split(",");
        //准备输出键
        String outKey = infos[5]+infos[4];
        //准备输出值
        UserAction ua = new UserAction(outKey,1);
        //将数据传给环形缓冲区
        context.write(new Text(outKey),ua);
    }
}
package com.njbdqn.actcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class AcReduce extends Reducer<Text,UserAction,Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<UserAction> values, Context context) throws IOException, InterruptedException {
        //安装用户id和用户acttype已经分好的组 进行动作统计400CLICK 400COLLECT
        int num=0;
        for (UserAction ua:values) {
            num+=ua.getCnt();
        }
        context.write(key,new IntWritable(num));
    }
}
package com.njbdqn.actcount;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * 自定义输入输出类型
 */
public class UserAction implements WritableComparable<UserAction> {
    private String useridAndAct;
    private int cnt;


    public UserAction() {
    }


    public UserAction(String useridAndAct, int cnt) {
        this.useridAndAct = useridAndAct;
        this.cnt = cnt;
    }

    @Override
    public int compareTo(UserAction o) {
        return 0;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(useridAndAct);
        out.writeInt(cnt);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.useridAndAct = in.readUTF();
        this.cnt = in.readInt();
    }

    public String getUseridAndAct() {
        return useridAndAct;
    }

    public void setUseridAndAct(String useridAndAct) {
        this.useridAndAct = useridAndAct;
    }

    public int getCnt() {
        return cnt;
    }

    public void setCnt(int cnt) {
        this.cnt = cnt;
    }
}

结果

第三种 横向显示结果

package com.myh.actcount2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class Ac2Demo {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(Ac2Demo.class);
        job.setJobName("ac2");
        FileInputFormat.addInputPath(job, new Path("hdfs://192.168.100.195:9000/logs/202001"));
        FileOutputFormat.setOutputPath(job, new Path("file:///e:/res2"));
        job.setMapperClass(Ac2Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(UserAction.class);
        job.setReducerClass(Ac2Reduce.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(UserAction.class);
        job.waitForCompletion(true);
    }
}

 

package com.myh.actcount2;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @Description:
 * @author:myh
 * @date: 2021/9/17  0:15
 */
public class Ac2Mapper extends Mapper<LongWritable, Text, Text, UserAction> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //读取行数据 amd,16G,2.7GHz,2020-01-02#00:06:24,COLLECT,400283,goods.html,228778,FIREFOX,90
        String[] infos = value.toString().split(",");
        UserAction ua = new UserAction();
        ua.setUserid(infos[5]);
        switch (infos[4]) {
            case "BROWSE":
                ua.setBrowse(1);
                break;
            case "CLICK":
                ua.setClick(1);
                break;
            case "COLLECT":
                ua.setCollect(1);
                break;
            default:
                ua.setBuy(1);
        }
        context.write(new Text(infos[5]),ua);

    }
}
package com.myh.actcount2;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @Description:
 * @author:myh
 * @date: 2021/9/17  0:29
 */
public class Ac2Reduce extends Reducer<Text, UserAction, NullWritable, UserAction> {
    @Override
    protected void reduce(Text key, Iterable<UserAction> values, Context context) throws IOException, InterruptedException {
        //把所有同组的UserAction的四个值加起来
        int browse = 0, click = 0, buy = 0, collect = 0;
        for (UserAction ua : values) {
            browse += ua.getBrowse();
            click += ua.getClick();
            buy += ua.getBuy();
            collect += ua.getCollect();
        }
        UserAction out = new UserAction(key.toString(), browse, click, buy, collect);
        context.write(NullWritable.get(), out);

    }
}
package com.myh.actcount2;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * @Description:
 * @author:myh
 * @date: 2021/9/17  0:13
 */
public class UserAction implements WritableComparable<UserAction> {
    private String userid;
    private int browse;
    private int click;
    private int collect;
    private int buy;

    public String getUserid() {
        return userid;
    }

    public void setUserid(String userid) {
        this.userid = userid;
    }

    public int getBrowse() {
        return browse;
    }

    public void setBrowse(int browse) {
        this.browse = browse;
    }

    public int getClick() {
        return click;
    }

    public void setClick(int click) {
        this.click = click;
    }

    public int getCollect() {
        return collect;
    }

    public void setCollect(int collect) {
        this.collect = collect;
    }

    public int getBuy() {
        return buy;
    }

    public void setBuy(int buy) {
        this.buy = buy;
    }

    public UserAction(String userid, int browse, int click, int collect, int buy) {
        this.userid = userid;
        this.browse = browse;
        this.click = click;
        this.collect = collect;
        this.buy = buy;
    }

    public UserAction() {
    }

    @Override
    public int compareTo(UserAction o) {
        return 0;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(userid);
        out.writeInt(browse);
        out.writeInt(click);
        out.writeInt(collect);
        out.writeInt(buy);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.userid = in.readUTF();
        this.browse = in.readInt();
        this.click = in.readInt();
        this.collect = in.readInt();
        this.buy = in.readInt();
    }
    @Override
    public String toString() {
        return userid+","+browse+","+click+","+collect+","+buy;
    }
}

结果

 

第三种mapjoin

package com.myh.mapjoin;

import com.myh.common.DelLocalDriectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class UoMapDemo {
    public static void main(String[] args) throws Exception {
        DelLocalDriectory.del("e:/res4");
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(UoMapDemo.class);
        job.setJobName("uo");
        FileInputFormat.addInputPath(job, new Path("e:/data"));//order.txt
        FileOutputFormat.setOutputPath(job, new Path("e:/res4"));
        //将user.txt小文件 发送到每个Mapper节点上 不超过25M
        job.addCacheFile(new Path("e:/users.txt").toUri());
        job.setMapperClass(UoMapMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setOutputValueClass(UserOrder.class);
        job.setReducerClass(UoMapReduce.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(UserOrder.class);
        job.setNumReduceTasks(2);
        job.waitForCompletion(true);

    }
}
package com.myh.mapjoin;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;

/**
 * @Description:
 * @author:myh
 * @date: 2021/9/17  1:12
 */
public class UoMapMapper extends Mapper<LongWritable, Text, Text, UserOrder> {
    Map<String, String> users = new HashMap<>();
    //只会在mapper节点运行task任务时 执行一次 后面就再也不执行了 有点类似于类中的静态方法


    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        //获取缓存文件

        URI[] files = context.getCacheFiles();
        if (null != files && files.length > 0) {
            //获取缓存文件的路径
            String path = files[0].getPath();
            //利用BufferedReader 读缓存文件 并把文件内容分割后 userid username 存放到users中
            BufferedReader br = new BufferedReader(new FileReader(path));
            String line = "";
            while ((line = br.readLine()) != null) {//1,zhangsan,40
                String[] us = line.split(",");
                users.put(us[0], us[1]);//{1=>zhangsan,2=>lisi,3=>wangwu}
            }
            br.close();
        }
    }
    //每读一条语句 都会执行一次

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //将对应的order信息和users中的信息对比生成你要的UserOrder
        String[] split = value.toString().split(",");//1,1,50
        UserOrder uo = new UserOrder();
        uo.setUserid(split[1]);
        uo.setMoney(Integer.parseInt(split[2]));
        uo.setUsername(users.get(split[1]));
        context.write(new Text(split[1]), uo);//{userid:1,username:zs,money:50}
    }
}
package com.myh.mapjoin;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class UoMapReduce extends Reducer<Text, UserOrder, NullWritable, UserOrder> {
    @Override
    protected void reduce(Text key, Iterable<UserOrder> values, Context context) throws IOException, InterruptedException {
        int count = 0;
        UserOrder uoout = null;
        for (UserOrder uo : values) {
            count += uo.getMoney();
            if (uoout == null) {
                uoout = uo;
            }
        }
        uoout.setMoney(count);
        context.write(NullWritable.get(), uoout);

    }
}
package com.myh.mapjoin;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * @Description:
 * @author:myh
 * @date: 2021/9/17  1:09
 */
public class UserOrder implements WritableComparable<UserOrder> {
    private String userid;
    private String username;
    private int money;
    public UserOrder() {
    }

    public UserOrder(String userid, String username, int money) {
        this.userid = userid;
        this.username = username;
        this.money = money;
    }
    @Override
    public int compareTo(UserOrder o) {
        return 0;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(userid);
        dataOutput.writeUTF(username);
        dataOutput.writeInt(money);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.userid= dataInput.readUTF();
        this.username=dataInput.readUTF();
        this.money=dataInput.readInt();
    }
    public String getUserid() {
        return userid;
    }

    public void setUserid(String userid) {
        this.userid = userid;
    }

    public String getUsername() {
        return username;
    }

    public void setUsername(String username) {
        this.username = username;
    }

    public int getMoney() {
        return money;
    }

    public void setMoney(int money) {
        this.money = money;
    }

    @Override
    public String toString() {
        return String.format("%s,%s,%d",userid,username,money);
    }
}

使用文件

结果

 

 

第四种mpcombiner

 

代码

package com.njbdqn.mpcombiner;

import com.njbdqn.common.DelLocalDriectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class UoMapDemo {
    public static void main(String[] args) throws Exception{
        DelLocalDriectory.del("e:/res6");
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(UoMapDemo.class);
        job.setJobName("uo1");

        FileInputFormat.addInputPath(job,new Path("e:/data"));//order.txt
        FileOutputFormat.setOutputPath(job,new Path("e:/res6"));
        //将user.txt小文件 发送到每个Mapper节点上 不超过25M
        job.addCacheFile(new Path("e:/users.txt").toUri());

        job.setMapperClass(UoMapMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(UserOrder1.class);

//        job.setReducerClass(UoMapReduce.class);
//        job.setOutputKeyClass(NullWritable.class);
//        job.setOutputValueClass(UserOrder.class);
//        在每个mapper节点上聚合一次
//        后期你再用mapreduce统计数据时就少很多网络传输数据
        job.setCombinerClass(UoMapReduce.class);


        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(UserOrder1.class);
        //设置你有几个reduce节点并发工作 默认情况下是1
        job.setNumReduceTasks(2);
        job.waitForCompletion(true);
    }

}
package com.njbdqn.mpcombiner;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;

public class UoMapMapper extends Mapper<LongWritable, Text,Text, UserOrder1> {
    Map<String,String> users = new HashMap<String,String>();

    //只会在mapper节点运行task任务时 执行一次 后面就再也不执行了 有点类似于类中的静态方法
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        //获取缓存文件
        URI[] files = context.getCacheFiles();
        if(null!=files && files.length>0 ) {
            //获取缓存文件的路径
            String path = files[0].getPath();
            //利用BufferedReader 读缓存文件 并把文件内容分割后 userid username 存放到users中
            BufferedReader br = new BufferedReader(new FileReader(path));
            String line="";
            while((line=br.readLine())!=null){//1,zhangsan,40
                String[] us = line.split(",");
                users.put(us[0],us[1]);//{1=>zhangsan,2=>lisi,3=>wangwu}
            }
            br.close();
        }
    }

    //每读一条语句 都会执行一次
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //将对应的order信息和users中的信息对比生成你要的UserOrder
        String[] split = value.toString().split(",");//1,1,50
        UserOrder1 uo = new UserOrder1();
        uo.setUserid(split[1]);
        uo.setMoney(Integer.parseInt(split[2]));
        uo.setUsername(users.get(split[1]));
        context.write(new Text(split[1]),uo);//{userid:1,username:zs,money:50}
    }
}
package com.njbdqn.mpcombiner;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Iterator;

//拿reduce当combiner
public class UoMapReduce extends Reducer<Text, UserOrder1, Text, UserOrder1> {
    @Override
    protected void reduce(Text key, Iterable<UserOrder1> values, Context context) throws IOException, InterruptedException {
        int count=0;
        UserOrder1 uoout = new UserOrder1();
        for (Iterator<UserOrder1> it = values.iterator(); it.hasNext();){
            UserOrder1 uo = it.next();
            count+=uo.getMoney();
            if (uoout.getUserid()==null) {
                uoout.setUserid(uo.getUserid());
                uoout.setUsername(uo.getUsername());
            }
        }
        uoout.setMoney(count);
        System.out.println(key.toString()+"=========>"+uoout);
        context.write(key,uoout);

    }
}
package com.njbdqn.mpcombiner;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class UserOrder1 implements WritableComparable<UserOrder1> {
    private String userid;
    private String username;
    private int money;

    public UserOrder1() {
    }

    public UserOrder1(String userid, String username, int money) {
        this.userid = userid;
        this.username = username;
        this.money = money;
    }

    @Override
    public int compareTo(UserOrder1 o) {
        return 0;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(userid);
        dataOutput.writeUTF(username);
        dataOutput.writeInt(money);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.userid = dataInput.readUTF();
        this.username = dataInput.readUTF();
        this.money = dataInput.readInt();
    }

    public String getUserid() {
        return userid;
    }

    public void setUserid(String userid) {
        this.userid = userid;
    }

    public String getUsername() {
        return username;
    }

    public void setUsername(String username) {
        this.username = username;
    }

    public int getMoney() {
        return money;
    }

    public void setMoney(int money) {
        this.money = money;
    }

    @Override
    public String toString() {
        return String.format("%s,%s,%d",userid,username,money);
    }
}

效果

 第五种partition part

代码

package com.myh.part;

import com.myh.common.DelLocalDriectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class OrderDemo {
    public static void main(String[] args) throws Exception {
        DelLocalDriectory.del("e:/res7");
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(OrderDemo.class);
        job.setJobName("partition");
        FileInputFormat.addInputPath(job, new Path("e:/data"));
        FileOutputFormat.setOutputPath(job, new Path("e:/res7"));
        job.setMapperClass(OrderMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Orders.class);
        job.setPartitionerClass(OrderPartitioner.class);
        job.setNumReduceTasks(2);
        job.waitForCompletion(true);

    }
}
package com.myh.part;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


public class OrderMapper extends Mapper<LongWritable, Text, Text, Orders> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] spt = value.toString().split(",");
        Orders od = new Orders(spt[1], Integer.parseInt(spt[2]));
        context.write(new Text(spt[1]), od);

    }
}

 

package com.myh.part;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

/**
 * @Description:
 * @author:myh
 * @date: 2021/9/17  1:56
 */
public class OrderPartitioner extends Partitioner<Text, Orders> {
    @Override
    public int getPartition(Text text, Orders orders, int i) {
        int num = Integer.parseInt(text.toString());
        if (num <= 2) {
            return 0;
        } else {
            return 1;
        }
    }
}
package com.myh.part;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * @Description:
 * @author:myh
 * @date: 2021/9/17  1:53
 */
public class Orders implements WritableComparable<Orders> {
    private String userid;
    private int money;

    public Orders() {
    }

    public Orders(String userid, int money) {
        this.userid = userid;
        this.money = money;
    }



    public String getUserid() {
        return userid;
    }

    public void setUserid(String userid) {
        this.userid = userid;
    }

    public int getMoney() {
        return money;
    }

    public void setMoney(int money) {
        this.money = money;
    }

    @Override
    public String toString() {
        return "Orders{" +
                "userid='" + userid + '\'' +
                ", money=" + money +
                '}';
    }
    @Override
    public int compareTo(Orders o) {
        return 0;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(userid);
        dataOutput.writeInt(money);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.userid = dataInput.readUTF();
        this.money = dataInput.readInt();
    }
}

结果

第六种reducejoin累加统计

代码

package com.myh.reducejoin;

import com.myh.common.DelLocalDriectory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @Description:
 * @author:myh
 * @date: 2021/9/17  2:09
 */
public class UoDemo {
    public static void main(String[] args) throws Exception {
        DelLocalDriectory.del("e:/res3");
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(UoDemo.class);
        job.setJobName("uo");
        FileInputFormat.addInputPath(job,new Path("e:/data"));
        FileOutputFormat.setOutputPath(job,new Path("e:/res3"));
        job.setMapperClass(UoMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(UserOrder.class);
        job.setReducerClass(UoReduce.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(UserOrder.class);
        job.waitForCompletion(true);
    }
}
package com.myh.reducejoin;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;


public class UoMapper extends Mapper<LongWritable, Text, Text, UserOrder> {
        //context 容器上下文  管理Mapper容器和Reduce容器的 管家

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //输入文件不同填充的属性也不一样

        //1,zhangsan,40   1,1,50
        String[] infos = value.toString().split(",");
        //获取文件名
        String fileName = ((FileSplit) context.getInputSplit()).getPath().toString();
        UserOrder uo = new UserOrder();
        //e:/data/users.txt  e:/data/order.txt  //这个目录下两个文件  这种情况不是两个txt文件放两个地方了 放一起了
        if (fileName.indexOf("order.txt") != -1) {
            uo.setUserid(infos[1]);
            uo.setMoney(Integer.parseInt(infos[2]));
            uo.setUsername("");
        } else {
            uo.setUserid(infos[0]);
            uo.setUsername(infos[1]);

        }
        context.write(new Text(uo.getUserid()), uo);
    }
}

package com.myh.reducejoin;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class UoReduce extends Reducer<Text, UserOrder, NullWritable, UserOrder> {
    @Override
    protected void reduce(Text key, Iterable<UserOrder> values, Context context) throws IOException, InterruptedException {
        //把每组钱加起来
        String username = "";
        int count = 0;
        for (UserOrder uo : values) {
            count += uo.getMoney();
            if (username.equals("") && uo.getUsername() != "") {
                username = uo.getUsername();
            }
        }
        UserOrder od = new UserOrder(key.toString(), username, count);
        context.write(NullWritable.get(), od);
    }
}
package com.myh.reducejoin;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class UserOrder implements WritableComparable<UserOrder> {
    private String userid;
    private String username;
    private int money;

    public UserOrder() {
    }

    public UserOrder(String userid, String username, int money) {
        this.userid = userid;
        this.username = username;
        this.money = money;
    }

    @Override
    public int compareTo(UserOrder o) {
        return 0;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(userid);
        dataOutput.writeUTF(username);
        dataOutput.writeInt(money);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.userid = dataInput.readUTF();
        this.username = dataInput.readUTF();
        this.money = dataInput.readInt();
    }

    public String getUserid() {
        return userid;
    }

    public void setUserid(String userid) {
        this.userid = userid;
    }

    public String getUsername() {
        return username;
    }

    public void setUsername(String username) {
        this.username = username;
    }

    public int getMoney() {
        return money;
    }

    public void setMoney(int money) {
        this.money = money;
    }

    @Override
    public String toString() {
        return String.format("%s,%s,%d",userid,username,money);
    }
}

放置文件

 

结果

累加统计

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值