1.共享单车之数据存储
1.1获取工作簿中的数据
package com.educoder.savedata;
import java.io.InputStream;
import java.text.DecimalFormat;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
public class SaveWookbook {
public static void main(String[] args) throws Exception {
InputStream resourceAsStream = SaveData.class.getClassLoader().getResourceAsStream("data.xls");
Workbook workbook = WorkbookFactory.create(resourceAsStream);
Sheet sheet = workbook.getSheetAt(0);
int rows = sheet.getPhysicalNumberOfRows();
for(int n=1;n<rows;n++)
{
Row row = sheet.getRow(n);
try{
DecimalFormat formatter1 = new DecimalFormat("########");
String trip_id = formatter1.format(row.getCell(0).getNumericCellValue());
FastDateFormat instance = FastDateFormat.getInstance("MM/dd/yyyyHH:mm");
String beginTimeValue = row.getCell(1).getStringCellValue();
long begintime = instance.parse(beginTimeValue).getTime();
int car_id = (int)row.getCell(3).getNumericCellValue();
double start_longitude = row.getCell(9).getNumericCellValue();
DecimalFormat formatter2 = new DecimalFormat("###.######");
String longitude = formatter2.format(start_longitude);
System.out.println("骑行id:"+trip_id+",开始时间:"+begintime+",车辆id:"+car_id+",结束经度:"+longitude);
}catch(Exception e){
}
}
}
}
1.2 保存共享单车数据
package com.educoder.savedata;
import java.io.InputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import com.educoder.util.HBaseUtil;
public class SaveData {
public static void SaveBicycleData() throws Exception {
HBaseUtil.createTable("t_shared_bicycle", "info");
InputStream resourceAsStream = SaveData.class.getClassLoader().getResourceAsStream("dataResources.xls");
Workbook workbook = WorkbookFactory.create(resourceAsStream);
Sheet sheet = workbook.getSheetAt(0);
int rows = sheet.getPhysicalNumberOfRows();
List<Put> puts = new ArrayList<Put>();
for (int n = 1; n < rows; n++) {
try {
Row row = sheet.getRow(n);
DecimalFormat formatter1 = new DecimalFormat("########");
String trip_id = formatter1.format(row.getCell(0).getNumericCellValue());
Put put = new Put(Bytes.toBytes(trip_id));
byte[] family = Bytes.toBytes("info");
FastDateFormat instance = FastDateFormat.getInstance("MM/dd/yyyy HH:mm");
String beginTimeValue = row.getCell(1).getStringCellValue();
Date parse = instance.parse(beginTimeValue);
put.addColumn(family, Bytes.toBytes("beginTime"),Bytes.toBytes(String.valueOf(parse.getTime())));
String endTimeValue = row.getCell(2).getStringCellValue();
Date parse2 = instance.parse(endTimeValue);
put.addColumn(family, Bytes.toBytes("endTime"),Bytes.toBytes(String.valueOf(parse2.getTime())));
int bicycleId = (int)row.getCell(3).getNumericCellValue();
put.addColumn(family, Bytes.toBytes("bicycleId"),
Bytes.toBytes(String.valueOf(bicycleId)));
String departure = row.getCell(4).getStringCellValue();
put.addColumn(family, Bytes.toBytes("departure"),
Bytes.toBytes(departure));
String destination = row.getCell(5).getStringCellValue();
put.addColumn(family, Bytes.toBytes("destination"),
Bytes.toBytes(destination));
String city = row.getCell(6).getStringCellValue();
put.addColumn(family, Bytes.toBytes("city"), Bytes.toBytes(city));
if (destination.equals(city)|| departure.equals(destination) ) {
continue;
}
DecimalFormat formatter2 = new DecimalFormat("###.######");
String start_longitude = formatter2.format(row.getCell(7).getNumericCellValue());
put.addColumn(family, Bytes.toBytes("start_longitude"), Bytes.toBytes(String.valueOf(start_longitude)));
String start_latitude = formatter2.format(row.getCell(8).getNumericCellValue());
put.addColumn(family, Bytes.toBytes("start_latitude"), Bytes.toBytes(String.valueOf(start_latitude)));
String stop_longitude = formatter2.format(row.getCell(9).getNumericCellValue());
put.addColumn(family, Bytes.toBytes("stop_longitude"), Bytes.toBytes(String.valueOf(stop_longitude)));
String stop_latitude = formatter2.format(row.getCell(10).getNumericCellValue());
put.addColumn(family, Bytes.toBytes("stop_latitude"),
Bytes.toBytes(String.valueOf(stop_latitude)));
puts.add(put);
} catch (Exception e) {
}
}
HBaseUtil.putByTable("t_shared_bicycle", puts);
}
}
2.共享单车之数据分析
2.1统计共享单车每天的平均使用时间
package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import java.text.ParseException;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import java.math.RoundingMode;
import java.math.BigDecimal;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
public class AveragetTimeMapReduce extends Configured implements Tool {
public static final byte[] family = "info".getBytes();
public static class MyMapper extends TableMapper<Text, BytesWritable> {
protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
throws IOException, InterruptedException {
long beginTime = Long.parseLong(Bytes.toString(result.getValue(family, "beginTime".getBytes())));
long endTime = Long.parseLong(Bytes.toString(result.getValue(family, "endTime".getBytes())));
String format = DateFormatUtils.format(beginTime, "yyyy-MM-dd", Locale.CHINA);
long useTime = endTime - beginTime;
BytesWritable bytesWritable = new BytesWritable(Bytes.toBytes(format + "_" + useTime));
context.write(new Text("avgTime"), bytesWritable);
}
}
public static class MyTableReducer extends TableReducer<Text, BytesWritable, ImmutableBytesWritable> {
@Override
public void reduce(Text key, Iterable<BytesWritable> values, Context context)
throws IOException, InterruptedException {
double sum = 0;
int length = 0;
Map<String, Long> map = new HashMap<String, Long>();
for (BytesWritable price : values) {
byte[] copyBytes = price.copyBytes();
String string = Bytes.toString(copyBytes);
String[] split = string.split("_");
if (map.containsKey(split[0])) {
Long integer = map.get(split[0]) + Long.parseLong(split[1]);
map.put(split[0], integer);
} else {
map.put(split[0], Long.parseLong(split[1]));
}
}
Collection<Long> values2 = map.values();
for (Long i : values2) {
length++;
sum += i;
}
BigDecimal decimal = new BigDecimal(sum / length /1000);
BigDecimal setScale = decimal.setScale(2, RoundingMode.HALF_DOWN);
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn(family, "avgTime".getBytes(), Bytes.toBytes(setScale.toString()));
context.write(null, put);
}
}
public int run(String[] args) throws Exception {
Configuration conf = HBaseUtil.conf;
String arg1 = "t_shared_bicycle";
String arg2 = "t_bicycle_avgtime";
try {
HBaseUtil.createTable(arg2, new String[] { "info" });
} catch (Exception e) {
e.printStackTrace();
}
Job job = configureJob(conf, new String[] { arg1, arg2 });
return job.waitForCompletion(true) ? 0 : 1;
}
private Job configureJob(Configuration conf, String[] args) throws IOException {
String tablename = args[0];
String targetTable = args[1];
Job job = new Job(conf, tablename);
Scan scan = new Scan();
scan.setCaching(300);
scan.setCacheBlocks(false);
TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, BytesWritable.class, job);
TableMapReduceUtil.initTableReducerJob(targetTable,
MyTableReducer.class,
job);
job.setNumReduceTasks(1);
return job;
}
}
2.2统计共享单车在指定地点的每天平均次数
package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
public class AverageVehicleMapReduce extends Configured implements Tool {
public static final byte[] family = "info".getBytes();
public static class MyMapper extends TableMapper<Text, BytesWritable> {
protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
throws IOException, InterruptedException {
String beginTime = Bytes.toString(result.getValue(family, "beginTime".getBytes()));
String format = DateFormatUtils.format(Long.parseLong(beginTime), "yyyy-MM-dd", Locale.CHINA);
BytesWritable bytesWritable = new BytesWritable(Bytes.toBytes(format));
context.write(new Text("河北省保定市雄县-韩庄村"), bytesWritable);
}
}
public static class MyTableReducer extends TableReducer<Text, BytesWritable, ImmutableBytesWritable> {
@Override
public void reduce(Text key, Iterable<BytesWritable> values, Context context)
throws IOException, InterruptedException {
double sum = 0;
int length = 0;
Map<String, Integer> map = new HashMap<String, Integer>();
for (BytesWritable price : values) {
byte[] copyBytes = price.copyBytes();
String string = Bytes.toString(copyBytes);
if (map.containsKey(string)) {
Integer integer = map.get(string) + 1;
map.put(string, integer);
} else {
map.put(string, new Integer(1));
}
}
Collection<Integer> values2 = map.values();
for (Integer i : values2) {
length++;
sum += i;
}
BigDecimal decimal = new BigDecimal(sum / length);
BigDecimal setScale = decimal.setScale(2, RoundingMode.HALF_DOWN);
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn(family, "avgNum".getBytes(), Bytes.toBytes(setScale.toString()));
context.write(null, put);
}
}
public int run(String[] args) throws Exception {
Configuration conf = HBaseUtil.conf;
String arg1 = "t_shared_bicycle";
String arg2 = "t_bicycle_avgnum";
try {
HBaseUtil.createTable(arg2, new String[] { "info" });
} catch (Exception e) {
e.printStackTrace();
}
Job job = configureJob(conf, new String[] { arg1, arg2 });
return job.waitForCompletion(true) ? 0 : 1;
}
private Job configureJob(Configuration conf, String[] args) throws IOException {
String tablename = args[0];
String targetTable = args[1];
Job job = new Job(conf, tablename);
Scan scan = new Scan();
scan.setCaching(300);
scan.setCacheBlocks(false);
ArrayList<Filter> listForFilters = new ArrayList<Filter>();
Filter destinationFilter = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("destination"),
CompareOperator.EQUAL, new SubstringComparator("韩庄村"));
Filter departure = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("departure"),
CompareOperator.EQUAL, Bytes.toBytes("河北省保定市雄县"));
listForFilters.add(departure);
listForFilters.add(destinationFilter);
scan.setCaching(300);
scan.setCacheBlocks(false);
Filter filters = new FilterList(listForFilters);
scan.setFilter(filters);
TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, BytesWritable.class, job);
TableMapReduceUtil.initTableReducerJob(targetTable,
MyTableReducer.class,
job);
job.setNumReduceTasks(1);
return job;
}
}
2.3 统计共享单车指定车辆每次使用的空闲平均时间
package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.RoundingMode;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
public class FreeTimeMapReduce extends Configured implements Tool {
public static final byte[] family = "info".getBytes();
public static class MyMapper extends TableMapper<Text, BytesWritable> {
protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
throws IOException, InterruptedException {
long beginTime = Long.parseLong(Bytes.toString(result.getValue(family, "beginTime".getBytes())));
long endTime = Long.parseLong(Bytes.toString(result.getValue(family, "endTime".getBytes())));
BytesWritable bytesWritable = new BytesWritable(Bytes.toBytes(beginTime + "_" + endTime));
context.write(new Text("5996"), bytesWritable);
}
}
public static class MyTableReducer extends TableReducer<Text, BytesWritable, ImmutableBytesWritable> {
@Override
public void reduce(Text key, Iterable<BytesWritable> values, Context context)
throws IOException, InterruptedException {
long freeTime = 0;
long beginTime = 0;
int length = 0;
for (BytesWritable time : values) {
byte[] copyBytes = time.copyBytes();
String timeLong = Bytes.toString(copyBytes);
String[] split = timeLong.split("_");
if(beginTime == 0) {
beginTime = Long.parseLong(split[0]);
continue;
}
else {
freeTime = freeTime + beginTime - Long.parseLong(split[1]);
beginTime = Long.parseLong(split[0]);
length ++;
}
}
Put put = new Put(Bytes.toBytes(key.toString()));
BigDecimal decimal = new BigDecimal(freeTime / length /1000 /60 /60);
BigDecimal setScale = decimal.setScale(2, RoundingMode.HALF_DOWN);
put.addColumn(family, "freeTime".getBytes(), Bytes.toBytes(setScale.toString()));
context.write(null, put);
}
}
public int run(String[] args) throws Exception {
Configuration conf = HBaseUtil.conf;
String arg1 = "t_shared_bicycle";
String arg2 = "t_bicycle_freetime";
try {
HBaseUtil.createTable(arg2, new String[] { "info" });
} catch (Exception e) {
e.printStackTrace();
}
Job job = configureJob(conf, new String[] { arg1, arg2 });
return job.waitForCompletion(true) ? 0 : 1;
}
private Job configureJob(Configuration conf, String[] args) throws IOException {
String tablename = args[0];
String targetTable = args[1];
Job job = new Job(conf, tablename);
Scan scan = new Scan();
scan.setCaching(300);
scan.setCacheBlocks(false);
Filter filter = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("bicycleId"),
CompareOperator.EQUAL, Bytes.toBytes("5996"));
scan.setFilter(filter);
TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, BytesWritable.class, job);
TableMapReduceUtil.initTableReducerJob(targetTable,
MyTableReducer.class,
job);
job.setNumReduceTasks(1);
return job;
}
}
2.4 统计指定时间共享单车使用次数
package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
public class UsageRateMapReduce extends Configured implements Tool {
public static final byte[] family = "info".getBytes();
public static class MyMapper extends TableMapper<Text, IntWritable> {
protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
throws IOException, InterruptedException {
IntWritable doubleWritable = new IntWritable(1);
context.write(new Text("departure"), doubleWritable);
}
}
public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int totalNum = 0;
for (IntWritable num : values) {
int d = num.get();
totalNum += d;
}
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn(family, "usageRate".getBytes(), Bytes.toBytes(String.valueOf(totalNum)));
context.write(null, put);
}
}
public int run(String[] args) throws Exception {
Configuration conf = HBaseUtil.conf;
String arg1 = "t_shared_bicycle";
String arg2 = "t_bicycle_usagerate";
try {
HBaseUtil.createTable(arg2, new String[] { "info" });
} catch (Exception e) {
e.printStackTrace();
}
Job job = configureJob(conf, new String[] { arg1, arg2 });
return job.waitForCompletion(true) ? 0 : 1;
}
private Job configureJob(Configuration conf, String[] args) throws IOException {
String tablename = args[0];
String targetTable = args[1];
Job job = new Job(conf, tablename);
ArrayList<Filter> listForFilters = new ArrayList<Filter>();
FastDateFormat instance = FastDateFormat.getInstance("yyyy-MM-dd");
Scan scan = new Scan();
scan.setCaching(300);
scan.setCacheBlocks(false);
try {
Filter destinationFilter = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("beginTime"),
CompareOperator.GREATER_OR_EQUAL, Bytes.toBytes(String.valueOf(instance.parse("2017-08-01").getTime())));
Filter departure = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("endTime"),
CompareOperator.LESS_OR_EQUAL, Bytes.toBytes(String.valueOf(instance.parse("2017-09-01").getTime())));
listForFilters.add(departure);
listForFilters.add(destinationFilter);
}
catch (Exception e) {
e.printStackTrace();
return null;
}
Filter filters = new FilterList(listForFilters);
scan.setFilter(filters);
TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, IntWritable.class, job);
TableMapReduceUtil.initTableReducerJob(targetTable,
MyTableReducer.class,
job);
job.setNumReduceTasks(1);
return job;
}
}
2.5 统计共享单车线路流量
package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
public class LineTotalMapReduce extends Configured implements Tool {
public static final byte[] family = "info".getBytes();
public static class MyMapper extends TableMapper<Text, IntWritable> {
protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
throws IOException, InterruptedException {
String start_latitude = Bytes.toString(result.getValue(family, "start_latitude".getBytes()));
String start_longitude = Bytes.toString(result.getValue(family, "start_longitude".getBytes()));
String stop_latitude = Bytes.toString(result.getValue(family, "stop_latitude".getBytes()));
String stop_longitude = Bytes.toString(result.getValue(family, "stop_longitude".getBytes()));
String departure = Bytes.toString(result.getValue(family, "departure".getBytes()));
String destination = Bytes.toString(result.getValue(family, "destination".getBytes()));
IntWritable doubleWritable = new IntWritable(1);
context.write(new Text(start_latitude + "-" + start_longitude + "_" + stop_latitude + "-" + stop_longitude
+ "_" + departure + "-" + destination), doubleWritable);
}
}
public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int totalNum = 0;
for (IntWritable num : values) {
int d = num.get();
totalNum += d;
}
Put put = new Put(Bytes.toBytes(key.toString() + totalNum ));
put.addColumn(family, "lineTotal".getBytes(), Bytes.toBytes(String.valueOf(totalNum)));
context.write(null, put);
}
}
public int run(String[] args) throws Exception {
Configuration conf = HBaseUtil.conf;
String arg1 = "t_shared_bicycle";
String arg2 = "t_bicycle_linetotal";
try {
HBaseUtil.createTable(arg2, new String[] { "info" });
} catch (Exception e) {
e.printStackTrace();
}
Job job = configureJob(conf, new String[] { arg1, arg2 });
return job.waitForCompletion(true) ? 0 : 1;
}
private Job configureJob(Configuration conf, String[] args) throws IOException {
String tablename = args[0];
String targetTable = args[1];
Job job = new Job(conf, tablename);
Scan scan = new Scan();
scan.setCaching(300);
scan.setCacheBlocks(false);
TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, IntWritable.class, job);
TableMapReduceUtil.initTableReducerJob(targetTable,
MyTableReducer.class,
job);
job.setNumReduceTasks(1);
return job;
}
}
3.共享单车之数据可视化
3.1绘制地图
<%@ page language="java" contentType="text/html; charset=utf-8"
pageEncoding="utf-8"%>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<style type="text/css">
body, html,#allmap {
width: 100%;
height: 100%;
overflow: hidden;
margin:0;
}
</style>
<script type="text/javascript" src="http://api.map.baidu.com/api?v=1.4"></script>
<title>step1</title>
</head>
<body>
<div id="allmap"></div>
</body>
</html>
<script type="text/javascript">
var map = new BMap.Map("allmap");
var point = new BMap.Point(116.10 ,38.98);
map.centerAndZoom(point, 13);
map.addControl(new BMap.NavigationControl());
map.enableScrollWheelZoom();
var start_longitude=116.233093;
var start_latitude=39.04607;
var stop_longitude=116.235352;
var stop_latitude=39.041691;
var address=["乡里乡情铁锅炖南228米","擎天矿用材料有限公司北609米"];
<%@ page language="java" contentType="text/html; charset=utf-8"
pageEncoding="utf-8"%>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<style type="text/css">
body, html,#allmap {
width: 100%;
height: 100%;
overflow: hidden;
margin:0;
}
</style>
<script type="text/javascript" src="http://api.map.baidu.com/api?v=1.4"></script>
<title>step1</title>
</head>
<body>
<div id="allmap"></div>
</body>
</html>
<script type="text/javascript">
var map = new BMap.Map("allmap");
var point = new BMap.Point(116.10 ,38.98);
map.centerAndZoom(point, 13);
map.addControl(new BMap.NavigationControl());
map.enableScrollWheelZoom();
var start_longitude=116.233093;
var start_latitude=39.04607;
var stop_longitude=116.235352;
var stop_latitude=39.041691;
var address=["乡里乡情铁锅炖南228米","擎天矿用材料有限公司北609米"];
var polyline = new BMap.Polyline([
new BMap.Point(start_longitude, start_latitude),
new BMap.Point(stop_longitude, stop_latitude)
],{strokeColor:"red",strokeWeight:3,strokeOpacity:0.5});
map.addOverlay(polyline);
addArrow(polyline,10,Math.PI/7);
var marker = new BMap.Marker(new BMap.Point(start_longitude,start_latitude));
起点位置标注
var label = new BMap.Label(address[0], {offset: new BMap.Size(20, 0)});
字描述
marker.setLabel(label);
map.addOverlay(marker);
var marker = new BMap.Marker(new BMap.Point(stop_longitude,stop_latitude));
位置标注
var label = new BMap.Label(address[1], {offset: new BMap.Size(20, 0)});
字描述
marker.setLabel(label);
map.addOverlay(marker);
function addArrow(polyline,r,angle){
var linePoint=polyline.getPath();
var arrowCount=linePoint.length;
for(var i =1;i<arrowCount;i++){
var pixelStart=map.pointToPixel(linePoint[i-1]);
var pixelEnd=map.pointToPixel(linePoint[i]);
var pixelTemX,pixelTemY;
var pixelX,pixelY,pixelX1,pixelY1;
var delta=(pixelEnd.y-pixelStart.y)/(pixelEnd.x-pixelStart.x);
var param=Math.sqrt(delta*delta+1);
if((pixelEnd.x-pixelStart.x)<0){
pixelTemX=pixelEnd.x+ r/param;
pixelTemY=pixelEnd.y+delta*r/param;
}else{
pixelTemX=pixelEnd.x- r/param;
pixelTemY=pixelEnd.y-delta*r/param;
}
pixelX=pixelTemX+ Math.tan(angle)*r*delta/param;
pixelY=pixelTemY-Math.tan(angle)*r/param;
pixelX1=pixelTemX- Math.tan(angle)*r*delta/param;
pixelY1=pixelTemY+Math.tan(angle)*r/param;
var pointArrow=map.pixelToPoint(new BMap.Pixel(pixelX,pixelY));
var pointArrow1=map.pixelToPoint(new BMap.Pixel(pixelX1,pixelY1));
var Arrow = new BMap.Polyline([
pointArrow,linePoint[i],pointArrow1
], {strokeColor:"blue", strokeWeight:3, strokeOpacity:0.5});
map.addOverlay(Arrow);
return Arrow;
}
}
</script>
function addArrow(polyline,r,angle){
var linePoint=polyline.getPath();
var arrowCount=linePoint.length;
for(var i =1;i<arrowCount;i++){
var pixelStart=map.pointToPixel(linePoint[i-1]);
var pixelEnd=map.pointToPixel(linePoint[i]);
var pixelTemX,pixelTemY;
var pixelX,pixelY,pixelX1,pixelY1;
var delta=(pixelEnd.y-pixelStart.y)/(pixelEnd.x-pixelStart.x);
var param=Math.sqrt(delta*delta+1);
if((pixelEnd.x-pixelStart.x)<0){
pixelTemX=pixelEnd.x+ r/param;
pixelTemY=pixelEnd.y+delta*r/param;
}else{
pixelTemX=pixelEnd.x- r/param;
pixelTemY=pixelEnd.y-delta*r/param;
}
pixelX=pixelTemX+ Math.tan(angle)*r*delta/param;
pixelY=pixelTemY-Math.tan(angle)*r/param;
pixelX1=pixelTemX- Math.tan(angle)*r*delta/param;
pixelY1=pixelTemY+Math.tan(angle)*r/param;
var pointArrow=map.pixelToPoint(new BMap.Pixel(pixelX,pixelY));
var pointArrow1=map.pixelToPoint(new BMap.Pixel(pixelX1,pixelY1));
var Arrow = new BMap.Polyline([
pointArrow,linePoint[i],pointArrow1
], {strokeColor:"blue", strokeWeight:3, strokeOpacity:0.5});
map.addOverlay(Arrow);
return Arrow;
}
}
</script>
3.2绘制流量最高的五条线路的路程图
package com.educoder.servlet;
import com.educoder.util.HBaseUtil;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
public class BickMapServlet extends HttpServlet {
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
request.setCharacterEncoding("utf-8");
response.setContentType("text/html;charset=utf-8");
BickMap bickMap = HBaseUtil.scanTable();
for (String key : bickMap.getKeys()) {
request.setAttribute(key,bickMap.get(key));
}
request.getRequestDispatcher("step2.jsp").forward(request,response);
}
@Override
protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
doGet(req,resp);
}
}
4.共享单车之租赁需求预估
4.1数据探索与可视化
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
#********* Begin *********#
import pandas as pd
import matplotlib.pyplot as plt
train_df = pd.read_csv('./step1/bike_train.csv')
train_df['hour'] = train_df.datetime.apply(lambda x:x.split()[1].split(':')[0]).astype('int')
group_hour=train_df.groupby(train_df.hour)
hour_mean=group_hour[['count','registered','casual']].mean()
fig=plt.figure(figsize=(10,10))
plt.plot(hour_mean['count'])
plt.title('average count per hour')
plt.savefig('./step1/result/plot.png')
#********* End *********#
4.2特征工程
import pandas as pd
import numpy as np
from datetime import datetime
def transform_data(train_df):
'''
将train_df中的datetime划分成year、month、date、weekday、hour
:param train_df:从bike_train.csv中读取的DataFrame
:return:无
'''
#********* Begin *********#
train_df['date'] = train_df.datetime.apply(lambda x:x.split()[0])
train_df['hour'] = train_df.datetime.apply(lambda x:x.split()[1].split(':')[0]).astype('int')
train_df['year'] = train_df.datetime.apply(lambda x:x.split()[0].split('-')[0]).astype('int')
train_df['month'] = train_df.datetime.apply(lambda x: x.split()[0].split('-')[1]).astype('int')
train_df['weekday'] = train_df.date.apply(lambda x: datetime.strptime(x, '%Y-%m-%d').isoweekday())
return train_df
#********* End **********#
4.3租赁需求预估
#********* Begin *********#
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.linear_model import Ridge
train_df = pd.read_csv('./step3/bike_train.csv')
# 舍弃掉异常count
train_df=train_df[np.abs(train_df['count']-train_df['count'].mean())<=3*train_df['count'].std()]
# 训练集的时间数据处理
train_df['date']=train_df.datetime.apply(lambda x:x.split()[0])
train_df['hour']=train_df.datetime.apply(lambda x:x.split()[1].split(':')[0]).astype('int')
train_df['year']=train_df.datetime.apply(lambda x:x.split()[0].split('-')[0]).astype('int')
train_df['month']=train_df.datetime.apply(lambda x:x.split()[0].split('-')[1]).astype('int')
train_df['weekday']=train_df.date.apply( lambda x : datetime.strptime(x,'%Y-%m-%d').isoweekday())
# 独热编码
train_df_back=train_df
dummies_month = pd.get_dummies(train_df['month'], prefix='month')
dummies_year = pd.get_dummies(train_df['year'], prefix='year')
dummies_season = pd.get_dummies(train_df['season'], prefix='season')
dummies_weather = pd.get_dummies(train_df['weather'], prefix='weather')
train_df_back = pd.concat([train_df, dummies_month,dummies_year, dummies_season,dummies_weather], axis = 1)
train_label = train_df_back['count']
train_df_back = train_df_back.drop(['datetime', 'season', 'weather', 'atemp', 'date', 'month', 'count'], axis=1)
test_df = pd.read_csv('./step3/bike_test.csv')
# 测试集的时间数据处理
test_df['date']=test_df.datetime.apply(lambda x:x.split()[0])
test_df['hour']=test_df.datetime.apply(lambda x:x.split()[1].split(':')[0]).astype('int')
test_df['year']=test_df.datetime.apply(lambda x:x.split()[0].split('-')[0]).astype('int')
test_df['month']=test_df.datetime.apply(lambda x:x.split()[0].split('-')[1]).astype('int')
test_df['weekday']=test_df.date.apply( lambda x : datetime.strptime(x,'%Y-%m-%d').isoweekday())
# 独热编码
test_df_back=test_df
dummies_month = pd.get_dummies(test_df['month'], prefix='month')
dummies_year = pd.get_dummies(test_df['year'], prefix='year')
dummies_season = pd.get_dummies(test_df['season'], prefix='season')
dummies_weather = pd.get_dummies(test_df['weather'], prefix='weather')
test_df_back = pd.concat([test_df, dummies_month,dummies_year, dummies_season,dummies_weather], axis = 1)
test_df_back = test_df_back.drop(['datetime', 'season', 'weather', 'atemp', 'date', 'month'], axis=1)
clf = Ridge(alpha=1.0)
# 训练
clf.fit(train_df_back, train_label)
# 预测
count = clf.predict(test_df_back)
# 保存结果
result = pd.DataFrame({'datetime':test_df['datetime'], 'count':count})
result.to_csv('./step3/result.csv', index=False)
#********* End *********#