java mapreduce api_hadoop用java API实现mapreduce示例

本文展示了如何使用Java实现Hadoop MapReduce API,包括自定义数据类型FlowBean,编写Mapper、Reducer以及主方法FlowSunRuner,用于处理和聚合电话号码的上行、下行流量数据。
摘要由CSDN通过智能技术生成

标签:

自定义数据类型bean

package org.hadoop.total;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

/*

* FlowBean要在节点传输,要符合hadoop的序列号机制,实现Writable接口

* */

public class FlowBean implements Writable {

// 上行流量

private long down_flow;

// 下行流量

private long up_flow;

// 总

private long total;

// 电话号码

private String phone;

public FlowBean() {

//为了反射,没有构造函数的时候不用写,如果有别的构造函数这里必须写

}

public FlowBean(String phone, long up_flow, long down_flow) {

this.phone = phone;

this.up_flow = up_flow;

this.down_flow = down_flow;

this.total = up_flow + down_flow;

}

public long getDown_flow() {

return down_flow;

}

public long getUp_flow() {

return up_flow;

}

public long getTotal() {

return total;

}

//数据写入输出流

public void write(DataOutput out) throws IOException {

// 写进字节数组

out.writeUTF(phone);

out.writeLong(up_flow);

out.writeLong(down_flow);

out.writeLong(total);

}

//读取要传递的数据,读取的顺序要和写的一致,先进先出

public void readFields(DataInput in) throws IOException {

// 读出字节数组

phone = in.readUTF();

up_flow = in.readLong();

down_flow = in.readLong();

total = in.readLong();

}

@Override

public String toString() {

return ""+up_flow+"-"+down_flow+"-"+total;

}

}

map程序

package org.hadoop.total;

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/*

* 输入key:LongWritable,起始偏移量

* 输入value:Text,每行的文本

* 输出key:Text,电话号码

* 输出value:bean

*

* */

public class FlowSunMapper extends Mapper {

// map方法会自动传人一行数据value

// key是输入的key

// context是输出用的封装类

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String string = value.toString(); // 获得一行数据

String[] arr = StringUtils.split(string, "\t");

String phonenum = arr[1];

long u_flow = Long.parseLong(arr[7]);

long d_flow = Long.parseLong(arr[8]);

//map的数据要用context封装

context.write(new Text(phonenum), new FlowBean(phonenum,u_flow,d_flow));

}

}

reduce方法

package org.hadoop.total;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowSunReducer extends Reducer {

// key是键值

// values 是{flowbean,flowbean,flowbean,flowbean,flowbean}

@Override

protected void reduce(Text key, Iterable values, Context context) throws IOException,InterruptedException

{

long down_flow_counte = 0;

long up_flow_counte = 0;

for (FlowBean bean:values)

{

up_flow_counte = up_flow_counte+bean.getUp_flow();

down_flow_counte= down_flow_counte+bean.getDown_flow();

}

context.write(key, new FlowBean(key.toString(),up_flow_counte,down_flow_counte));

}

}

主方法

package org.hadoop.total;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class FlowSunRuner extends Configured implements Tool {

public int run(String[] args) throws Exception {

Configuration conf = new Configuration();

conf.set("fs.defaultFS", "hdfs://hadoop101:9000/");

// 创建一个job

Job job = Job.getInstance(conf);

job.setJarByClass(FlowSunRuner.class);

//设置map

job.setMapperClass(FlowSunMapper.class);

//设置reduce

job.setReducerClass(FlowSunReducer.class);

//设置输入

job.setMapOutputValueClass(FlowBean.class);

job.setMapOutputKeyClass(Text.class);

//设置输出

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(FlowBean.class);

System.out.println(args);

//给定输入

FileInputFormat.setInputPaths(job,new Path("hdfs://192.168.117.101:9000/HTTP_20130313143750.dat"));

//给定输出

FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.117.101:9000/out/test/"));

return job.waitForCompletion(true)?0:1;

}

public static void main(String[] args) throws Exception

{

int run = ToolRunner.run(new Configuration(), new FlowSunRuner(), args);

}

}

标签:

来源: https://blog.csdn.net/weixin_37275456/article/details/83344146

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值