Hadoop学习:MapReduce不使用Reduce将表合并提高效率

33 篇文章 3 订阅
9 篇文章 2 订阅

✌✌✌古人有云,好记性不如烂笔头,千里之行,始于足下,每日千行代码必不可少,每日总结写一写,目标大厂,满怀希望便会所向披靡,哈哈哈!!!✌✌✌

在这里插入图片描述

一、✌题目要求

record表:

ID城市编号空气指数
00103245
00202655
00305743
00404246
00502956
00601637
00705831
00803683
00902349

city表:

城市编号城市名称
01长沙
02株洲
03湘潭
04怀化
05岳阳

目标表:

ID城市名称空气指数
001湘潭245
002株洲655
003岳阳743
004怀化246
005株洲956
006长沙637
007岳阳831
008湘潭683
009株洲349

三、✌代码实现

1.✌Bean类

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Bean implements Writable {

    private String id;
    private String pid;
    private int amount;
    private String pname;
    private String type;

    public Bean() {
        super();
    }

    public Bean(String id, String pid, int amount, String pname, String type) {
        this.id = id;
        this.pid = pid;
        this.amount = amount;
        this.pname = pname;
        this.type = type;
    }

    @Override
    public String toString() {
        return id + "\t" + pname + "\t\t" + amount;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getPid() {
        return pid;
    }

    public void setPid(String pid) {
        this.pid = pid;
    }

    public int getAmount() {
        return amount;
    }

    public void setAmount(int amount) {
        this.amount = amount;
    }

    public String getPname() {
        return pname;
    }

    public void setPname(String pname) {
        this.pname = pname;
    }

    public String getType() {
        return type;
    }

    public void setType(String type) {
        this.type = type;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(id);
        out.writeUTF(pid);
        out.writeInt(amount);
        out.writeUTF(pname);
        out.writeUTF(type);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        id = in.readUTF();
        pid = in.readUTF();
        amount = in.readInt();
        pname = in.readUTF();
        type = in.readUTF();
    }

}

2.✌Map类

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.HashMap;

public class Map extends Mapper<LongWritable, Text, Bean, NullWritable> {

    HashMap<String, String> map = new HashMap();

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {

        //获取缓冲数据
        URI[] cacheFiles = context.getCacheFiles();
        String path = cacheFiles[0].getPath().toString();

        //创建缓冲流
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path)));

        String line;

        while (StringUtils.isNotEmpty(line = reader.readLine())) {

            String[] words = line.split("\t");

            map.put(words[0], words[1]);

        }

        reader.close();

    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String line = value.toString();

        String[] words = line.split("\t");

        context.write(new Bean(words[0], words[1], Integer.parseInt(words[2]), map.get(words[1]), ""), NullWritable.get());

    }
}

3.✌Driver类

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;


public class Driver {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {

        args = new String[]{"D:/input/inputword", "D:/output"};

        BasicConfigurator.configure();

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);

        job.setJarByClass(Driver.class);
        job.setMapperClass(Map.class);

        job.addCacheFile(new URI("file:///D:/input/inputcache/pd.txt"));
        job.setNumReduceTasks(0);

        job.setOutputKeyClass(Bean.class);
        job.setOutputValueClass(NullWritable.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);


    }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

海洋 之心

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值