java mapreduce api_hadoop用java API实现mapreduce示例

布丢bodium

于 2021-02-24 05:17:27 发布

阅读量120

点赞数

文章标签： java mapreduce api

本文链接：https://blog.csdn.net/weixin_31459035/article/details/114541252

版权

本文展示了如何使用Java实现Hadoop MapReduce API，包括自定义数据类型FlowBean，编写Mapper、Reducer以及主方法FlowSunRuner，用于处理和聚合电话号码的上行、下行流量数据。

摘要由CSDN通过智能技术生成

标签：

自定义数据类型bean

package org.hadoop.total;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

* FlowBean要在节点传输，要符合hadoop的序列号机制，实现Writable接口

* */

public class FlowBean implements Writable {

// 上行流量

private long down_flow;

// 下行流量

private long up_flow;

// 总

private long total;

// 电话号码

private String phone;

public FlowBean() {

//为了反射，没有构造函数的时候不用写，如果有别的构造函数这里必须写

}

public FlowBean(String phone, long up_flow, long down_flow) {

this.phone = phone;

this.up_flow = up_flow;

this.down_flow = down_flow;

this.total = up_flow + down_flow;

}

public long getDown_flow() {

return down_flow;

}

public long getUp_flow() {

return up_flow;

}

public long getTotal() {

return total;

}

//数据写入输出流

public void write(DataOutput out) throws IOException {

// 写进字节数组

out.writeUTF(phone);

out.writeLong(up_flow);

out.writeLong(down_flow);

out.writeLong(total);

}

//读取要传递的数据,读取的顺序要和写的一致，先进先出

public void readFields(DataInput in) throws IOException {

// 读出字节数组

phone = in.readUTF();

up_flow = in.readLong();

down_flow = in.readLong();

total = in.readLong();

}

@Override

public String toString() {

return ""+up_flow+"-"+down_flow+"-"+total;

}

map程序

package org.hadoop.total;

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

* 输入key:LongWritable,起始偏移量

* 输入value:Text,每行的文本

* 输出key:Text,电话号码

* 输出value:bean

* */

public class FlowSunMapper extends Mapper {

// map方法会自动传人一行数据value

// key是输入的key

// context是输出用的封装类

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String string = value.toString(); // 获得一行数据

String[] arr = StringUtils.split(string, "\t");

String phonenum = arr[1];

long u_flow = Long.parseLong(arr[7]);

long d_flow = Long.parseLong(arr[8]);

//map的数据要用context封装

context.write(new Text(phonenum), new FlowBean(phonenum,u_flow,d_flow));

}

reduce方法

package org.hadoop.total;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowSunReducer extends Reducer {

// key是键值

// values 是{flowbean,flowbean,flowbean,flowbean,flowbean}

@Override

protected void reduce(Text key, Iterable values, Context context) throws IOException,InterruptedException

{

long down_flow_counte = 0;

long up_flow_counte = 0;

for (FlowBean bean:values)

{

up_flow_counte = up_flow_counte+bean.getUp_flow();

down_flow_counte= down_flow_counte+bean.getDown_flow();

}

context.write(key, new FlowBean(key.toString(),up_flow_counte,down_flow_counte));

}

主方法

package org.hadoop.total;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class FlowSunRuner extends Configured implements Tool {

public int run(String[] args) throws Exception {

Configuration conf = new Configuration();

conf.set("fs.defaultFS", "hdfs://hadoop101:9000/");

// 创建一个job

Job job = Job.getInstance(conf);

job.setJarByClass(FlowSunRuner.class);

//设置map

job.setMapperClass(FlowSunMapper.class);

//设置reduce

job.setReducerClass(FlowSunReducer.class);

//设置输入

job.setMapOutputValueClass(FlowBean.class);

job.setMapOutputKeyClass(Text.class);

//设置输出

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(FlowBean.class);

System.out.println(args);

//给定输入

FileInputFormat.setInputPaths(job,new Path("hdfs://192.168.117.101:9000/HTTP_20130313143750.dat"));

//给定输出

FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.117.101:9000/out/test/"));

return job.waitForCompletion(true)?0:1;

}

public static void main(String[] args) throws Exception

{

int run = ToolRunner.run(new Configuration(), new FlowSunRuner(), args);

}

标签：

来源： https://blog.csdn.net/weixin_37275456/article/details/83344146

布丢bodium

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫