Hadoop实战学习(3)-手机流量统计

先装备一个txt文档,内容如下:

13726230501 200 1100
13396230502 300 1200
13892730503 400 1300
13897230503 100 300
13597230534 500 1400
13597230534 300 1200

第1列表示手机号码,第2列表示上行流量,第3列表示下行流量。

需求:

要把同一个用户的上行流量、下行流量进行累加,并计算出综合。

例如上面的13897230503有两条记录,就要对这两条记录进行累加,计算总和,得到:

13897230503,500,1600,2100


实现思路:

map

接收日志的一行数据,key为行的偏移量,value为此行数据。


输出时,应以手机号为key,value应为一个整体,包括:上行流量、下行流量、总流量。


手机号是字符串类型Text,而这个整体不能用基本数据类型表示,需要我们自定义一个bean对象,并且要实现可序列化。


key: 13897230503

value: < upFlow:100, dFlow:300, sumFlow:400 >


reduce

接收一个手机号标识的key,及这个手机号对应的bean对象集合。


例如:

key:

13897230503


value:

< upFlow:400, dFlow:1300, sumFlow:1700 >,

< upFlow:100, dFlow:300, sumFlow:400 >


迭代bean对象集合,累加各项,形成一个新的bean对象,例如:

< upFlow:400+100, dFlow:1300+300, sumFlow:1700+400 >


最后输出:

key: 13897230503

value: < upFlow:500, dFlow:1600, sumFlow:2100 >


代码实现:

新建一个Maven工程,其中pom.xml内容如下:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.gd.hadoop.flowcount</groupId>
  <artifactId>flowcount</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>flowcount</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
    <dependency>
    	<groupId>commons-beanutils</groupId>
    	<artifactId>commons-beanutils</artifactId>
    	<version>1.7.0</version>
    </dependency>
    <dependency>
    	<groupId>org.apache.hadoop</groupId>
    	<artifactId>hadoop-common</artifactId>
    	<version>2.7.3</version>
    </dependency>
    <dependency>
    	<groupId>org.apache.hadoop</groupId>
    	<artifactId>hadoop-hdfs</artifactId>
    	<version>2.7.3</version>
    </dependency>
    <dependency>
    	<groupId>org.apache.hadoop</groupId>
    	<artifactId>hadoop-mapreduce-client-common</artifactId>
    	<version>2.7.3</version>
    </dependency>
    <dependency>
    	<groupId>org.apache.hadoop</groupId>
    	<artifactId>hadoop-mapreduce-client-core</artifactId>
    	<version>2.7.3</version>
    </dependency>
    <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
    </dependency>
  </dependencies>
  <build>
      <plugins>
         <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <configuration>
               <source>1.8</source>
               <target>1.8</target>
            </configuration>
         </plugin>
      </plugins>
  </build>
  
</project>

其中Java代码如下:

FlowBean类:

package com.gd.hadoop.flowcount;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class FlowBean implements Writable {

	private long upFlow;

	private long downFlow;

	private long sumFlow;

	public FlowBean() {
		super();
		// TODO Auto-generated constructor stub
	}

	public FlowBean(long upFlow, long downFlow) {
		super();
		this.upFlow = upFlow;
		this.downFlow = downFlow;
		this.sumFlow = upFlow + downFlow;
	}

	public long getUpFlow() {
		return upFlow;
	}

	public void setUpFlow(long upFlow) {
		this.upFlow = upFlow;
	}

	public long getDownFlow() {
		return downFlow;
	}

	public void setDownFlow(long downFlow) {
		this.downFlow = downFlow;
	}

	public long getSumFlow() {
		return sumFlow;
	}

	public void setSumFlow(long sumFlow) {
		this.sumFlow = sumFlow;
	}

	@Override
	public void readFields(DataInput arg0) throws IOException {
		// TODO Auto-generated method stub
        upFlow=arg0.readLong();
        downFlow=arg0.readLong();
        sumFlow=arg0.readLong();
	}

	@Override
	public void write(DataOutput arg0) throws IOException {
		// TODO Auto-generated method stub
		arg0.writeLong(upFlow);
		arg0.writeLong(downFlow);
		arg0.writeLong(sumFlow);
	}

	@Override
	public String toString() {
		// TODO Auto-generated method stub
		return upFlow+"\t"+downFlow+"\t"+sumFlow;
	}
	
}

FlowCount类:

package com.gd.hadoop.flowcount;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FlowCount {

	static class MyMapper extends Mapper<LongWritable, Text, Text, FlowBean>
	{

		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)
				throws IOException, InterruptedException {
			// TODO Auto-generated method stub
			String line=value.toString();
			String[] words=line.split(" ");
			context.write(new Text(words[0]), new FlowBean(Long.parseLong(words[1]), Long.parseLong(words[2])));
		}		
		
	}
	
	static class MyReducer extends Reducer<Text, FlowBean, Text, FlowBean>
	{

		@Override
		protected void reduce(Text arg0, Iterable<FlowBean> arg1, Reducer<Text, FlowBean, Text, FlowBean>.Context arg2)
				throws IOException, InterruptedException {
			// TODO Auto-generated method stub
			long sum_upFlow=0;
			long sum_downFlow=0;
			for (FlowBean flowBean : arg1) {
				sum_upFlow+=flowBean.getUpFlow();
				sum_downFlow+=flowBean.getDownFlow();
			}
			arg2.write(arg0, new FlowBean(sum_upFlow, sum_downFlow));
		}
		
	}
	public static void main(String[] args) throws Exception {
		// TODO Auto-generated method stub
		//创建配置对象
		Configuration configuration=new Configuration();
		
		//创建Job对象
		Job job=Job.getInstance(configuration, "FlowCount");
		
		//指定Jar包所在的本地路径
		job.setJarByClass(FlowCount.class);
		
		//指定本Job任务要使用的Mapper类与Reducer类
		
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		
		//指定Mapper输出的key-value类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);
		
		//指定最终输出的key-value类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);
		
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		boolean flag=job.waitForCompletion(true);
		if (flag) {
			System.out.println("complete sucessfully");
		}
		else{
			
			System.out.println("complete error");
		}
	}

}

然后导出jar包为flowcount.jar。

然后将jar包和txt文档上传到集群的Master用户主目录下面,再将txt文档上传到hdfs指定目录下面。

执行:



待得到“complete sucessfully”时,执行如下命令查看结果:



  • 0
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值