/**
* Copyright @ 2018 Truemen Tech Co. Ltd.
* All right reserved.
* @author: rk
* date: 2018-02-22
*/
package com.weibo.report.module;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* **
* 需求分析-微博发帖用户归属地分析
1.分析企业微博账户,所有粉丝的地域归属地,按周统计数据
2.生成结果字段:地域ID,地域名称,粉丝数量,日期
3.页面展示时需要计算用户选定的时间范围的企业微博用户归属地情况
4.分别通过HDFS存储
*
*/
public class RegionalAnalysis extends Configured implements Tool{
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.out.println("Usage:RegionalAnalysis <inputfile path> <outputfile path>");
System.exit(2);
}
int exit = ToolRunner.run(new RegionalAnalysis(), args);
System.exit(exit);
}
public int run(String[] arg0) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(RegionalAnalysis.class);
job.setMapperClass(RegionalAnalysisMapper.class);
job.setReducerClass(RegionalAnalysisReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job, arg0[0]);
Path outputPath = new Path(arg0[1]);
FileSystem fs = FileSystem.get(conf);
if(fs.exists(outputPath)){
fs.delete(outputPath, true);
}
FileOutputFormat.setOutputPath(job, outputPath);
job.waitForCompletion(true);
return 0;
}
}
Mapper类的创建
/**
* Copyright @ 2018 Truemen Tech Co. Ltd.
* All right reserved.
* @author: rk
* date: 2018-02-22
*
*/
package com.weibo.report.module;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* RegionalAnalysis Mapper
* @author rk
*
*/
public class RegionalAnalysisMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
Text outputKey = new Text();
LongWritable outputValue = new LongWritable(1L);
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String name = "";
//get line value
String[] val = value.toString().split(",");
String split = ",";
//output example ([地域ID,地域名称, 微博创建时间,微博ID],1)
if(val.length > 0){
name = val[val.length-2]+split+val[val.length-1]+split+val[0]+split+val[2];
outputKey.set(name);
System.out.println(name);
context.write(outputKey,outputValue);
}
}
}
Reducer类的创建
/**
* Copyright @ 2018 Truemen Tech Co. Ltd.
* All right reserved.
* @author: rk
* date: 2018-02-22
*
*/
package com.weibo.report.module;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* RegionalAnalysis Reducer
* @author rk
*
*/
public class RegionalAnalysisReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
LongWritable outputValue = new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> value,
Context context) throws IOException, InterruptedException {
long sum = 0L;
for(LongWritable num : value){
sum += num.get();
}
outputValue.set(sum);
context.write(key,outputValue);
}
}
利用注解的方式进行测试
package com.test.report.test;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;
import com.weibo.report.module.RegionalAnalysisMapper;
import com.weibo.report.module.RegionalAnalysisReducer;
public class RegionTest {
Mapper mapper;
Reducer reducer;
MapDriver mapDriver;
ReduceDriver reducerDriver;
@Before
public void init(){
mapper = new RegionalAnalysisMapper();
mapDriver = new MapDriver(mapper);
reducer = new RegionalAnalysisReducer();
reducerDriver = new ReduceDriver(reducer);
}
@Test
public void testmap() throws IOException{
String line = "20150311,23,11,1302347,tahenlan,北京市,true,false,38,10,29,1,001,zx34890,xiaoxiao,xiaoli,1,3,北京";
Text value = new Text(line);
LongWritable key = new LongWritable(1);
mapDriver.withInput(key,value).withOutput(new Text("3,北京,20150311,11"), new LongWritable(1L)).runTest();
}
@SuppressWarnings("rawtypes")
@Test
public void testReduce() throws IOException{
Text key = new Text("3,北京,20150311,11");
ArrayList values = new ArrayList();
values.add(new LongWritable(1));
values.add(new LongWritable(1));
values.add(new LongWritable(1));
reducerDriver.withInput(key, values).withOutput(key,new LongWritable(3)).runTest();
}
}