第1关:统计共享单车每天的平均使用时间
任务描述
本关任务:使用Hbase的MapReduce对已经存在 Hbase 的共享单车运行数据进行分析,统计共享单车每天的平均使用时间,其中共享单车运行数据在Hbase的t_shared_bicycle表中(表结构可在编程要求中进行查看)。
相关知识
为了完成本关任务,你需要掌握:
如何配置Hbase的MapReduce类;
如何使用Hbase的MapReduce进行数据分析。
package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import java.text.ParseException;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import java.math.RoundingMode;
import java.math.BigDecimal;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
/**
* 统计共享单车每天的平均使用时间
*/
public class AveragetTimeMapReduce extends Configured implements Tool {
public static final byte[] family = "info".getBytes();
public static class MyMapper extends TableMapper<Text, BytesWritable> {
protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
throws IOException, InterruptedException {
/********** Begin *********/
long beginTime = Long.parseLong(Bytes.toString(result.getValue(family, "beginTime".getBytes())));
long endTime = Long.parseLong(Bytes.toString(result.getValue(family, "endTime".getBytes())));
String format = DateFormatUtils.format(beginTime, "yyyy-MM-dd", Locale.CHINA);
long useTime = endTime - beginTime;
BytesWritable bytesWritable = new BytesWritable(Bytes.toBytes(format + "_" + useTime));
context.write(new Text("avgTime"), bytesWritable);
/********** End *********/
}
}
public static class MyTableReducer extends TableReducer<Text, BytesWritable, ImmutableBytesWritable> {
@Override
public void reduce(Text key, Iterable<BytesWritable> values, Context context)
throws IOException, InterruptedException {
/********** Begin *********/
double sum = 0;
int length = 0;
Map<String, Long> map = new HashMap<String, Long>();
for (BytesWritable price : values) {
byte[] copyBytes = price.copyBytes();
String string = Bytes.toString(copyBytes);
String[] split = string.split("_");
if (map.containsKey(split[0])) {
Long integer = map.get(split[0]) + Long.parseLong(split[1]);
map.put(split[0], integer);
} else {
map.put(split[0], Long.parseLong(split[1]));
}
}
Collection<Long> values2 = map.values();
for (Long i : values2) {
length++;
sum += i;
}
BigDecimal decimal = new BigDecimal(sum / length /1000);
BigDecimal setScale = decimal.setScale(2, RoundingMode.HALF_DOWN);
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn(family, "avgTime".getBytes(), Bytes.toBytes(setScale.toString()));
context.write(null, put);
/********** End *********/
}
}
public int run(String[] args) throws Exception {
// 配置Job
Configuration conf = HBaseUtil.conf;
// Scanner sc = new Scanner(System.in);
// String arg1 = sc.next();
// String arg2 = sc.next();
String arg1 = "t_shared_bicycle";
String arg2 = "t_bicycle_avgtime";
try {
HBaseUtil.createTable(arg2, new String[] { "info" });
} catch (Exception e) {
// 创建表失败
e.printStackTrace();
}
Job job = configureJob(conf, new String[] { arg1, arg2 });
return job.waitForCompletion(true) ? 0 : 1;
}
private Job configureJob(Configuration conf, String[] args) throws IOException {
String tablename = args[0];
String targetTable = args[1];
Job job = new Job(conf, tablename);
Scan scan = new Scan();
scan.setCaching(300);
scan.setCacheBlocks(false);// 在mapreduce程序中千万不要设置允许缓存
// 初始化Mapreduce程序
TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, BytesWritable.class, job);
// 初始化Reduce
TableMapReduceUtil.initTableReducerJob(targetTable, // output table
MyTableReducer.class, // reducer class
job);
job.setNumReduceTasks(1);
return job;
}
}
第2关:统计共享单车在指定地点的每天平均次数
任务描述
本关任务:使用Hbase的MapReduce对已经存在 Hbase 的共享单车运行数据进行分析,统计共享单车每天在指定地点的平均次数,其中共享单车运行数据在Hbase的t_shared_bicycle表中(表结构可在编程要求中进行查看)。
相关知识
为了完成本关任务,你需要掌握:
如何配置Hbase的MapReduce类;
如何使用Hbase的MapReduce进行数据分析;
如何使用过滤器过滤读取到的数据。
p