头歌-共享单车实训全代码(粘贴复制即可)

1.共享单车之数据存储

1.1获取工作簿中的数据

package com.educoder.savedata;

import java.io.InputStream;
import java.text.DecimalFormat;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;


public class SaveWookbook {

	public static void main(String[] args) throws Exception {
        /**********     Begin    **********/
		//1.通过类加载器获取本地文件并新建一个工作簿
        InputStream resourceAsStream = SaveData.class.getClassLoader().getResourceAsStream("data.xls");
        Workbook workbook = WorkbookFactory.create(resourceAsStream);
        
        //2.拿到工作簿中第一个Sheet
        Sheet sheet = workbook.getSheetAt(0);
        
        //3.获取当前Sheet中的行数
        int rows = sheet.getPhysicalNumberOfRows();
        
        //4.对所有有效数据进行遍历并输出(期间无效数据通过异常捕获方式清除)
        for(int n=1;n<rows;n++)
        {
            Row row = sheet.getRow(n);
            //通过异常方式清除格式不准确、数据不存在的无效行
            try{
                DecimalFormat formatter1 = new DecimalFormat("########");
                String trip_id = formatter1.format(row.getCell(0).getNumericCellValue());
                //开始时间
                FastDateFormat instance = FastDateFormat.getInstance("MM/dd/yyyyHH:mm");
                String beginTimeValue = row.getCell(1).getStringCellValue();
                long begintime = instance.parse(beginTimeValue).getTime();
                //车辆id
                int car_id = (int)row.getCell(3).getNumericCellValue();
                //结束经度
                double start_longitude = row.getCell(9).getNumericCellValue();
                DecimalFormat formatter2 = new DecimalFormat("###.######");
                String longitude = formatter2.format(start_longitude);
                System.out.println("骑行id:"+trip_id+",开始时间:"+begintime+",车辆id:"+car_id+",结束经度:"+longitude);
            }catch(Exception e){

            }
        }
        
        
       /******** **    End    ******* ***/
	}
}

1.2 保存共享单车数据

package com.educoder.savedata;

import java.io.InputStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import com.educoder.util.HBaseUtil;

/* 
* 读取共享单车城市行车数据
* 
*/
public class SaveData {

	public static void SaveBicycleData()  throws Exception {
		/******** **   Begin   ******* ***/
       HBaseUtil.createTable("t_shared_bicycle", "info"); 
       InputStream resourceAsStream = SaveData.class.getClassLoader().getResourceAsStream("dataResources.xls"); 
       Workbook workbook = WorkbookFactory.create(resourceAsStream); 
       Sheet sheet = workbook.getSheetAt(0); 
       int rows = sheet.getPhysicalNumberOfRows(); 
       List<Put> puts = new ArrayList<Put>(); 
       for (int n = 1; n < rows; n++) { 
        // 通过异常方式清除格式不准确、数据不存在的无效行
            try { 
				Row row = sheet.getRow(n); 
                // 唯一骑行id,当作行rowkey 
                DecimalFormat formatter1 = new DecimalFormat("########"); 
                String trip_id = formatter1.format(row.getCell(0).getNumericCellValue()); 
                Put put = new Put(Bytes.toBytes(trip_id)); 
                byte[] family = Bytes.toBytes("info"); 
                // 开始时间
                FastDateFormat instance = FastDateFormat.getInstance("MM/dd/yyyy HH:mm"); 
                String beginTimeValue = row.getCell(1).getStringCellValue(); 
                Date parse = instance.parse(beginTimeValue); 
                put.addColumn(family, Bytes.toBytes("beginTime"),Bytes.toBytes(String.valueOf(parse.getTime()))); 
                // 结束时间
                String endTimeValue = row.getCell(2).getStringCellValue(); 
                Date parse2 = instance.parse(endTimeValue); 
                put.addColumn(family, Bytes.toBytes("endTime"),Bytes.toBytes(String.valueOf(parse2.getTime()))); 
                // 单车识别码
                int bicycleId = (int)row.getCell(3).getNumericCellValue(); 
                put.addColumn(family, Bytes.toBytes("bicycleId"), 
Bytes.toBytes(String.valueOf(bicycleId))); 
                // 出发地
                String departure = row.getCell(4).getStringCellValue(); 
                put.addColumn(family, Bytes.toBytes("departure"), 
Bytes.toBytes(departure)); 
                // 目的地
                String destination = row.getCell(5).getStringCellValue(); 
                put.addColumn(family, Bytes.toBytes("destination"), 
Bytes.toBytes(destination)); 
                // 所在城市
                String city = row.getCell(6).getStringCellValue(); 
                put.addColumn(family, Bytes.toBytes("city"), Bytes.toBytes(city)); 
                // 清除目的地= 所在城市或者出发地= 目的地的无效数据
				if (destination.equals(city)|| departure.equals(destination) ) { 
					continue; 
					} 
                //开始经度
                DecimalFormat formatter2 = new DecimalFormat("###.######"); 
                String start_longitude = formatter2.format(row.getCell(7).getNumericCellValue()); 
                put.addColumn(family, Bytes.toBytes("start_longitude"), Bytes.toBytes(String.valueOf(start_longitude))); 
                //开始纬度
                String start_latitude = formatter2.format(row.getCell(8).getNumericCellValue()); 
                put.addColumn(family, Bytes.toBytes("start_latitude"), Bytes.toBytes(String.valueOf(start_latitude))); 
                //结束经度
                String stop_longitude = formatter2.format(row.getCell(9).getNumericCellValue()); 
                put.addColumn(family, Bytes.toBytes("stop_longitude"), Bytes.toBytes(String.valueOf(stop_longitude))); 
                //结束纬度
                String stop_latitude = formatter2.format(row.getCell(10).getNumericCellValue()); 
                put.addColumn(family, Bytes.toBytes("stop_latitude"), 
Bytes.toBytes(String.valueOf(stop_latitude))); 
                puts.add(put); 
				} catch (Exception e) { 

				} 
				} 
				HBaseUtil.putByTable("t_shared_bicycle", puts);     
        
		/******* ***   End   ****** ****/
	}

}


2.共享单车之数据分析

2.1统计共享单车每天的平均使用时间

package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import java.text.ParseException;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import java.math.RoundingMode;
import java.math.BigDecimal;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
/**
 * 统计共享单车每天的平均使用时间
 */
public class AveragetTimeMapReduce extends Configured implements Tool {
    public static final byte[] family = "info".getBytes();
    public static class MyMapper extends TableMapper<Text, BytesWritable> {
        protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
                throws IOException, InterruptedException {
            /********** Begin *********/
             long beginTime = Long.parseLong(Bytes.toString(result.getValue(family, "beginTime".getBytes())));
            long endTime = Long.parseLong(Bytes.toString(result.getValue(family, "endTime".getBytes())));
            // 转化为yyyy-MM-dd
            String format = DateFormatUtils.format(beginTime, "yyyy-MM-dd", Locale.CHINA);
            // 计算时间
            long useTime = endTime - beginTime;
            // 拼装value ,包含日期 + 使用时间
            BytesWritable bytesWritable = new BytesWritable(Bytes.toBytes(format + "_" + useTime));
            context.write(new Text("avgTime"), bytesWritable);
         
         
         
         
            /********** End *********/
        }
    }
    public static class MyTableReducer extends TableReducer<Text, BytesWritable, ImmutableBytesWritable> {
        @Override
        public void reduce(Text key, Iterable<BytesWritable> values, Context context)
                throws IOException, InterruptedException {
            /********** Begin *********/
             double sum = 0;
            int length = 0;
            Map<String, Long> map = new HashMap<String, Long>();
            for (BytesWritable price : values) {
                byte[] copyBytes = price.copyBytes();
                String string = Bytes.toString(copyBytes);
                String[] split = string.split("_");
                if (map.containsKey(split[0])) {
                    Long integer = map.get(split[0]) + Long.parseLong(split[1]);
                    map.put(split[0], integer);
                } else {
                    map.put(split[0], Long.parseLong(split[1]));
                }
            }
            // 统计map value平均值
            Collection<Long> values2 = map.values();
            for (Long i : values2) {
                length++;
                sum += i;
            }
            BigDecimal decimal = new BigDecimal(sum / length /1000);
            BigDecimal setScale = decimal.setScale(2, RoundingMode.HALF_DOWN);
            Put put = new Put(Bytes.toBytes(key.toString()));
            put.addColumn(family, "avgTime".getBytes(), Bytes.toBytes(setScale.toString()));
            context.write(null, put);// initTableReducerJob 设置了 表名所以在这里无需设置了
         
         
         
         
            /********** End *********/
        }
    }
    public int run(String[] args) throws Exception {
        // 配置Job
        Configuration conf = HBaseUtil.conf;
        // Scanner sc = new Scanner(System.in);
        // String arg1 = sc.next();
        // String arg2 = sc.next();
        String arg1 = "t_shared_bicycle";
        String arg2 = "t_bicycle_avgtime";
        try {
            HBaseUtil.createTable(arg2, new String[] { "info" });
        } catch (Exception e) {
            // 创建表失败
            e.printStackTrace();
        }
        Job job = configureJob(conf, new String[] { arg1, arg2 });
        return job.waitForCompletion(true) ? 0 : 1;
    }
    private Job configureJob(Configuration conf, String[] args) throws IOException {
        String tablename = args[0];
        String targetTable = args[1];
        Job job = new Job(conf, tablename);
        Scan scan = new Scan();
        scan.setCaching(300);
        scan.setCacheBlocks(false);// 在mapreduce程序中千万不要设置允许缓存
        // 初始化Mapreduce程序
        TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, BytesWritable.class, job);
        // 初始化Reduce
        TableMapReduceUtil.initTableReducerJob(targetTable, // output table
                MyTableReducer.class, // reducer class
                job);
        job.setNumReduceTasks(1);
        return job;
    }
}

2.2统计共享单车在指定地点的每天平均次数

package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
/**
 * 共享单车每天在韩庄村的平均空闲时间
 */
public class AverageVehicleMapReduce extends Configured implements Tool {
    public static final byte[] family = "info".getBytes();
    public static class MyMapper extends TableMapper<Text, BytesWritable> {
        protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
                throws IOException, InterruptedException {
            /********** Begin *********/
         
          // 时间
            String beginTime = Bytes.toString(result.getValue(family, "beginTime".getBytes()));
            // 转化为yyyy-MM-dd
            String format = DateFormatUtils.format(Long.parseLong(beginTime), "yyyy-MM-dd", Locale.CHINA);
            BytesWritable bytesWritable = new BytesWritable(Bytes.toBytes(format));
            context.write(new Text("河北省保定市雄县-韩庄村"), bytesWritable);
         
         
         
            /********** End *********/
        }
    }
    public static class MyTableReducer extends TableReducer<Text, BytesWritable, ImmutableBytesWritable> {
        @Override
        public void reduce(Text key, Iterable<BytesWritable> values, Context context)
                throws IOException, InterruptedException {
            /********** Begin *********/
         
         
          double sum = 0;
            int length = 0;
            Map<String, Integer> map = new HashMap<String, Integer>();
            for (BytesWritable price : values) {
                byte[] copyBytes = price.copyBytes();
                String string = Bytes.toString(copyBytes);
                if (map.containsKey(string)) {
                    Integer integer = map.get(string) + 1;
                    map.put(string, integer);
                } else {
                    map.put(string, new Integer(1));
                }
            }
            // 统计map value平均值
            Collection<Integer> values2 = map.values();
            for (Integer i : values2) {
                length++;
                sum += i;
            }
            BigDecimal decimal = new BigDecimal(sum / length);
            BigDecimal setScale = decimal.setScale(2, RoundingMode.HALF_DOWN);
            Put put = new Put(Bytes.toBytes(key.toString()));
            put.addColumn(family, "avgNum".getBytes(), Bytes.toBytes(setScale.toString()));
            context.write(null, put);// initTableReducerJob 设置了 表名所以在这里无需设置了
         
         
            /********** End *********/
        }
    }
    public int run(String[] args) throws Exception {
        // 配置Job
        Configuration conf = HBaseUtil.conf;
        //Scanner sc = new Scanner(System.in);
        //String arg1 = sc.next();
        //String arg2 = sc.next();
        String arg1 = "t_shared_bicycle";
        String arg2 = "t_bicycle_avgnum";
        try {
            HBaseUtil.createTable(arg2, new String[] { "info" });
        } catch (Exception e) {
            // 创建表失败
            e.printStackTrace();
        }
        Job job = configureJob(conf, new String[] { arg1, arg2 });
        return job.waitForCompletion(true) ? 0 : 1;
    }
    private Job configureJob(Configuration conf, String[] args) throws IOException {
        String tablename = args[0];
        String targetTable = args[1];
        Job job = new Job(conf, tablename);
        Scan scan = new Scan();
        scan.setCaching(300);
        scan.setCacheBlocks(false);// 在mapreduce程序中千万不要设置允许缓存
        /********** Begin *********/
         //设置过滤
          ArrayList<Filter> listForFilters = new ArrayList<Filter>();
          Filter destinationFilter = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("destination"),
                CompareOperator.EQUAL, new SubstringComparator("韩庄村"));
        Filter departure = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("departure"),
                CompareOperator.EQUAL, Bytes.toBytes("河北省保定市雄县"));
        listForFilters.add(departure);
        listForFilters.add(destinationFilter);
        scan.setCaching(300);
        scan.setCacheBlocks(false);// 在mapreduce程序中千万不要设置允许缓存
        Filter filters = new FilterList(listForFilters);
        scan.setFilter(filters);
         
         
            /********** End *********/
        // 初始化Mapreduce程序
        TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, BytesWritable.class, job);
        // 初始化Reduce
        TableMapReduceUtil.initTableReducerJob(targetTable, // output table
                MyTableReducer.class, // reducer class
                job);
        job.setNumReduceTasks(1);
        return job;
    }
}

2.3 统计共享单车指定车辆每次使用的空闲平均时间

package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.RoundingMode;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
/**
 * 
 * 统计5996共享单车每次使用的空闲平均时间
 */
public class FreeTimeMapReduce extends Configured implements Tool {
    public static final byte[] family = "info".getBytes();
    public static class MyMapper extends TableMapper<Text, BytesWritable> {
        protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
                throws IOException, InterruptedException {
            // 时间
            long beginTime = Long.parseLong(Bytes.toString(result.getValue(family, "beginTime".getBytes())));
            long endTime = Long.parseLong(Bytes.toString(result.getValue(family, "endTime".getBytes())));
            // 拼装value ,包含开始时间和结束时间
            BytesWritable bytesWritable = new BytesWritable(Bytes.toBytes(beginTime + "_" + endTime));
            context.write(new Text("5996"), bytesWritable);
        }
    }
    public static class MyTableReducer extends TableReducer<Text, BytesWritable, ImmutableBytesWritable> {
        @Override
        public void reduce(Text key, Iterable<BytesWritable> values, Context context)
                throws IOException, InterruptedException {
            long freeTime = 0;
            long beginTime = 0;
            int length = 0;
            for (BytesWritable time : values) {
                byte[] copyBytes = time.copyBytes();
                String timeLong = Bytes.toString(copyBytes);
                String[] split = timeLong.split("_");
                if(beginTime == 0) {
                    beginTime = Long.parseLong(split[0]);
                    continue;
                }
                else {
                    //空闲时间 = 本次开始时间 - 上一次结束时间 
                    freeTime = freeTime + beginTime -  Long.parseLong(split[1]);
                    //重新设置开始时间
                    beginTime = Long.parseLong(split[0]);
                    length ++;
                }
            }
            Put put = new Put(Bytes.toBytes(key.toString()));
            BigDecimal decimal = new BigDecimal(freeTime / length /1000 /60 /60);
            BigDecimal setScale = decimal.setScale(2, RoundingMode.HALF_DOWN);
            put.addColumn(family, "freeTime".getBytes(), Bytes.toBytes(setScale.toString()));
            context.write(null, put);// initTableReducerJob 设置了 表名所以在这里无需设置了
        }
    }
    public int run(String[] args) throws Exception {
        // 配置Job
        Configuration conf = HBaseUtil.conf;
        // Scanner sc = new Scanner(System.in);
        // String arg1 = sc.next();
        // String arg2 = sc.next();
        String arg1 = "t_shared_bicycle";
        String arg2 = "t_bicycle_freetime";
        try {
            HBaseUtil.createTable(arg2, new String[] { "info" });
        } catch (Exception e) {
            // 创建表失败
            e.printStackTrace();
        }
        Job job = configureJob(conf, new String[] { arg1, arg2 });
        return job.waitForCompletion(true) ? 0 : 1;
    }
    private Job configureJob(Configuration conf, String[] args) throws IOException {
        String tablename = args[0];
        String targetTable = args[1];
        Job job = new Job(conf, tablename);
        Scan scan = new Scan();
        scan.setCaching(300);
        scan.setCacheBlocks(false);// 在mapreduce程序中千万不要设置允许缓存
        Filter filter = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("bicycleId"),
                CompareOperator.EQUAL, Bytes.toBytes("5996"));
        scan.setFilter(filter);
        // 初始化Mapreduce程序
        TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, BytesWritable.class, job);
        // 初始化Reduce
        TableMapReduceUtil.initTableReducerJob(targetTable, // output table
                MyTableReducer.class, // reducer class
                job);
        job.setNumReduceTasks(1);
        return job;
    }
}

2.4 统计指定时间共享单车使用次数

package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
/**
 * 共享单车使用次数统计
 */
public class UsageRateMapReduce extends Configured implements Tool {
    public static final byte[] family = "info".getBytes();
    public static class MyMapper extends TableMapper<Text, IntWritable> {
        protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
                throws IOException, InterruptedException {
            /********** Begin *********/
            // 次数
            IntWritable doubleWritable = new IntWritable(1);
            context.write(new Text("departure"), doubleWritable);
            /********** End *********/
        }
    }
    public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
        @Override
        public void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
            /********** Begin *********/        
            int totalNum = 0;
            for (IntWritable num : values) {
                int d = num.get();
                totalNum += d;
            }
            Put put = new Put(Bytes.toBytes(key.toString()));
            put.addColumn(family, "usageRate".getBytes(), Bytes.toBytes(String.valueOf(totalNum)));
            context.write(null, put);// initTableReducerJob 设置了 表名所以在这里无需设置了
             /********** End *********/
        }
    }
    public int run(String[] args) throws Exception {
        // 配置Job
        Configuration conf = HBaseUtil.conf;
        // Scanner sc = new Scanner(System.in);
        // String arg1 = sc.next();
        // String arg2 = sc.next();
        String arg1 = "t_shared_bicycle";
        String arg2 = "t_bicycle_usagerate";
        try {
            HBaseUtil.createTable(arg2, new String[] { "info" });
        } catch (Exception e) {
            // 创建表失败
            e.printStackTrace();
        }
        Job job = configureJob(conf, new String[] { arg1, arg2 });
        return job.waitForCompletion(true) ? 0 : 1;
    }
    private Job configureJob(Configuration conf, String[] args) throws IOException {
        String tablename = args[0];
        String targetTable = args[1];
        Job job = new Job(conf, tablename);
        ArrayList<Filter> listForFilters = new ArrayList<Filter>();
        FastDateFormat instance = FastDateFormat.getInstance("yyyy-MM-dd");
        Scan scan = new Scan();
        scan.setCaching(300);
        scan.setCacheBlocks(false);// 在mapreduce程序中千万不要设置允许缓存
         /********** Begin *********/
        try {
        Filter destinationFilter = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("beginTime"),
                    CompareOperator.GREATER_OR_EQUAL, Bytes.toBytes(String.valueOf(instance.parse("2017-08-01").getTime())));
        Filter departure = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("endTime"),
                CompareOperator.LESS_OR_EQUAL, Bytes.toBytes(String.valueOf(instance.parse("2017-09-01").getTime())));
        listForFilters.add(departure);
        listForFilters.add(destinationFilter);
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
        Filter filters = new FilterList(listForFilters);
        scan.setFilter(filters);
        /********** End *********/
        // 初始化Mapreduce程序
        TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, IntWritable.class, job);
        // 初始化Reduce
        TableMapReduceUtil.initTableReducerJob(targetTable, // output table
                MyTableReducer.class, // reducer class
                job);
        job.setNumReduceTasks(1);
        return job;
    }
}

2.5 统计共享单车线路流量

package com.educoder.bigData.sharedbicycle;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import com.educoder.bigData.util.HBaseUtil;
/**
 * 共享单车线路流量统计
 */
public class LineTotalMapReduce extends Configured implements Tool {
    public static final byte[] family = "info".getBytes();
    public static class MyMapper extends TableMapper<Text, IntWritable> {
        protected void map(ImmutableBytesWritable rowKey, Result result, Context context)
                throws IOException, InterruptedException {
             /********** Begin *********/        
            // 开始经纬度
            String start_latitude = Bytes.toString(result.getValue(family, "start_latitude".getBytes()));
            String start_longitude = Bytes.toString(result.getValue(family, "start_longitude".getBytes()));
            // 结束经纬度
            String stop_latitude = Bytes.toString(result.getValue(family, "stop_latitude".getBytes()));
            String stop_longitude = Bytes.toString(result.getValue(family, "stop_longitude".getBytes()));
            // 出发地
            String departure = Bytes.toString(result.getValue(family, "departure".getBytes()));
            // 目的地
            String destination = Bytes.toString(result.getValue(family, "destination".getBytes()));
            // 拼装value 
            IntWritable doubleWritable = new IntWritable(1);
            context.write(new Text(start_latitude + "-" + start_longitude + "_" + stop_latitude + "-" + stop_longitude
                    + "_" + departure + "-" + destination), doubleWritable);
            /********** End *********/        
        }
    }
    public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
        @Override
        public void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
             /********** Begin *********/        
            int totalNum = 0;
            for (IntWritable num : values) {
                int d = num.get();
                totalNum += d;
            }
            Put put = new Put(Bytes.toBytes(key.toString() + totalNum ));
            put.addColumn(family, "lineTotal".getBytes(), Bytes.toBytes(String.valueOf(totalNum)));
            context.write(null, put);// initTableReducerJob 设置了 表名所以在这里无需设置了
            /********** End *********/
        }
    }
    public int run(String[] args) throws Exception {
        // 配置Job
        Configuration conf = HBaseUtil.conf;
        // Scanner sc = new Scanner(System.in);
        // String arg1 = sc.next();
        // String arg2 = sc.next();
        String arg1 = "t_shared_bicycle";
        String arg2 = "t_bicycle_linetotal";
        try {
            HBaseUtil.createTable(arg2, new String[] { "info" });
        } catch (Exception e) {
            // 创建表失败
            e.printStackTrace();
        }
        Job job = configureJob(conf, new String[] { arg1, arg2 });
        return job.waitForCompletion(true) ? 0 : 1;
    }
    private Job configureJob(Configuration conf, String[] args) throws IOException {
        String tablename = args[0];
        String targetTable = args[1];
        Job job = new Job(conf, tablename);
        Scan scan = new Scan();
        scan.setCaching(300);
        scan.setCacheBlocks(false);// 在mapreduce程序中千万不要设置允许缓存
        // 初始化Mapreduce程序
        TableMapReduceUtil.initTableMapperJob(tablename, scan, MyMapper.class, Text.class, IntWritable.class, job);
        // 初始化Reduce
        TableMapReduceUtil.initTableReducerJob(targetTable, // output table
                MyTableReducer.class, // reducer class
                job);
        job.setNumReduceTasks(1);
        return job;
    }
}

3.共享单车之数据可视化

3.1绘制地图

<%@ page language="java" contentType="text/html; charset=utf-8"
         pageEncoding="utf-8"%>
<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <style type="text/css">
        body, html,#allmap {
            width: 100%;
            height: 100%;
            overflow: hidden;
            margin:0;
        }
    </style>
    <script type="text/javascript" src="http://api.map.baidu.com/api?v=1.4"></script>
    <title>step1</title>
</head>
<body>
<div id="allmap"></div>
</body>
</html>
<script type="text/javascript">
    var map = new BMap.Map("allmap");// 创建地图实例
    var point = new BMap.Point(116.10 ,38.98);// 创建点坐标
    map.centerAndZoom(point, 13);//设初始化地图。 如果center类型为Point时,zoom必须赋值,范围3-19级,若调用高清底图(针对移动端开发)时,zoom可赋值范围为3-18级。如果center类型为字符串时,比如“北京”,zoom可以忽略,地图将自动根据center适配最佳zoom级别
    map.addControl(new BMap.NavigationControl());//缩放平移控件
    map.enableScrollWheelZoom();//利用鼠标滚轮控制大小
    var start_longitude=116.233093;//开始经度
    var start_latitude=39.04607;//开始纬度
    var stop_longitude=116.235352;//结束经度
    var stop_latitude=39.041691;//结束纬度
    var address=["乡里乡情铁锅炖南228米","擎天矿用材料有限公司北609米"];

	/**********   Begin   **********/
    //1.初始化路程线
    <%@ page language="java" contentType="text/html; charset=utf-8"
         pageEncoding="utf-8"%>
<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <style type="text/css">
        body, html,#allmap {
            width: 100%;
            height: 100%;
            overflow: hidden;
            margin:0;
        }
    </style>
    <script type="text/javascript" src="http://api.map.baidu.com/api?v=1.4"></script>
    <title>step1</title>
</head>
<body>
<div id="allmap"></div>
</body>
</html>
<script type="text/javascript">
    var map = new BMap.Map("allmap");// 创建地图实例
    var point = new BMap.Point(116.10 ,38.98);// 创建点坐标
    map.centerAndZoom(point, 13);//设初始化地图。 如果center类型为Point时,zoom必须赋值,范围3-19级,若调用高清底图(针对移动端开发)时,zoom可赋值范围为3-18级。如果center类型为字符串时,比如“北京”,zoom可以忽略,地图将自动根据center适配最佳zoom级别
    map.addControl(new BMap.NavigationControl());//缩放平移控件
    map.enableScrollWheelZoom();//利用鼠标滚轮控制大小
    var start_longitude=116.233093;//开始经度
    var start_latitude=39.04607;//开始纬度
    var stop_longitude=116.235352;//结束经度
    var stop_latitude=39.041691;//结束纬度
    var address=["乡里乡情铁锅炖南228米","擎天矿用材料有限公司北609米"];
 
	/**********   Begin   **********/
 
    //1.初始化路程线
    var polyline = new BMap.Polyline([  
    new BMap.Point(start_longitude, start_latitude),  
    new BMap.Point(stop_longitude, stop_latitude)  
    ],{strokeColor:"red",strokeWeight:3,strokeOpacity:0.5});//创建一条宽度为3外边框透明度为0.5的红色线  
 
    //2.将线添加到地图上
   map.addOverlay(polyline);
 
    //3.调用绘制箭头线函数
   addArrow(polyline,10,Math.PI/7);
 
    //4.设置起始点标注
 
    //①先设置起点位置
    var marker = new BMap.Marker(new BMap.Point(start_longitude,start_latitude)); //创建 
    起点位置标注  
    var label = new BMap.Label(address[0], {offset: new BMap.Size(20, 0)});//给标注设置文 
    字描述  //调用上面已经定义的address数组里面的第一个值,即起点的位置。
    marker.setLabel(label);//将文字描述设置到标注上  
    map.addOverlay(marker);//将标注添加到地图中  
 
   //①再设置终点位置
    var marker = new BMap.Marker(new BMap.Point(stop_longitude,stop_latitude)); //创建终点 
    位置标注  
    var label = new BMap.Label(address[1], {offset: new BMap.Size(20, 0)});//给标注设置文 
    字描述  //调用上面已经定义的address数组里面的第二个值,即终点的位置。
    marker.setLabel(label);//将文字描述设置到标注上  
    map.addOverlay(marker);//将标注添加到地图中  
 
 
//注(也可以是鄙人的一些废话):因为题目中的要求中写到“注意:直接使用已定义的变量,且已定义的变量名不要私自修改”,所以起点,终点位置标注当中 maker和label建议直接照抄左侧格式,不建议修改名称
 
	/**********   End   **********/
 
    //绘制箭头的函数
    function addArrow(polyline,r,angle){
        var linePoint=polyline.getPath();//线的坐标串(里面存的就是起始点的坐标点)
        var arrowCount=linePoint.length;
        for(var i =1;i<arrowCount;i++){ //在拐点处绘制箭头
            var pixelStart=map.pointToPixel(linePoint[i-1]);
            var pixelEnd=map.pointToPixel(linePoint[i]);
            var pixelTemX,pixelTemY;//临时点坐标
            var pixelX,pixelY,pixelX1,pixelY1;//定义箭头两个点坐标
            var delta=(pixelEnd.y-pixelStart.y)/(pixelEnd.x-pixelStart.x);//主线斜率,垂直时无斜率
            var param=Math.sqrt(delta*delta+1);//代码简洁考虑
            if((pixelEnd.x-pixelStart.x)<0){ //第二、三象限
                pixelTemX=pixelEnd.x+ r/param;
                pixelTemY=pixelEnd.y+delta*r/param;
            }else{ //第一、四象限
                pixelTemX=pixelEnd.x- r/param;
                pixelTemY=pixelEnd.y-delta*r/param;
            }
            //已知直角三角形两个点坐标及其中一个角,求另外一个点坐标算法
            pixelX=pixelTemX+ Math.tan(angle)*r*delta/param;
            pixelY=pixelTemY-Math.tan(angle)*r/param;
            pixelX1=pixelTemX- Math.tan(angle)*r*delta/param;
            pixelY1=pixelTemY+Math.tan(angle)*r/param;
            var pointArrow=map.pixelToPoint(new BMap.Pixel(pixelX,pixelY));
            var pointArrow1=map.pixelToPoint(new BMap.Pixel(pixelX1,pixelY1));
            var Arrow = new BMap.Polyline([
                pointArrow,linePoint[i],pointArrow1
            ], {strokeColor:"blue", strokeWeight:3, strokeOpacity:0.5});
            map.addOverlay(Arrow);
            return Arrow;
        }
    }
</script>


    //2.将线添加到地图上


    //3.调用绘制箭头线函数


    //4.设置起始点标注
    

	/**********   End   **********/

    //绘制箭头的函数
    function addArrow(polyline,r,angle){
        var linePoint=polyline.getPath();//线的坐标串(里面存的就是起始点的坐标点)
        var arrowCount=linePoint.length;
        for(var i =1;i<arrowCount;i++){ //在拐点处绘制箭头
            var pixelStart=map.pointToPixel(linePoint[i-1]);
            var pixelEnd=map.pointToPixel(linePoint[i]);
            var pixelTemX,pixelTemY;//临时点坐标
            var pixelX,pixelY,pixelX1,pixelY1;//定义箭头两个点坐标
            var delta=(pixelEnd.y-pixelStart.y)/(pixelEnd.x-pixelStart.x);//主线斜率,垂直时无斜率
            var param=Math.sqrt(delta*delta+1);//代码简洁考虑
            if((pixelEnd.x-pixelStart.x)<0){ //第二、三象限
                pixelTemX=pixelEnd.x+ r/param;
                pixelTemY=pixelEnd.y+delta*r/param;
            }else{ //第一、四象限
                pixelTemX=pixelEnd.x- r/param;
                pixelTemY=pixelEnd.y-delta*r/param;
            }
            //已知直角三角形两个点坐标及其中一个角,求另外一个点坐标算法
            pixelX=pixelTemX+ Math.tan(angle)*r*delta/param;
            pixelY=pixelTemY-Math.tan(angle)*r/param;
            pixelX1=pixelTemX- Math.tan(angle)*r*delta/param;
            pixelY1=pixelTemY+Math.tan(angle)*r/param;
            var pointArrow=map.pixelToPoint(new BMap.Pixel(pixelX,pixelY));
            var pointArrow1=map.pixelToPoint(new BMap.Pixel(pixelX1,pixelY1));
            var Arrow = new BMap.Polyline([
                pointArrow,linePoint[i],pointArrow1
            ], {strokeColor:"blue", strokeWeight:3, strokeOpacity:0.5});
            map.addOverlay(Arrow);
            return Arrow;
        }
    }
</script>

3.2绘制流量最高的五条线路的路程图

//servlet里的代码
package com.educoder.servlet;
 
import com.educoder.util.HBaseUtil;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
 
public class BickMapServlet extends HttpServlet {
    @Override
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    request.setCharacterEncoding("utf-8");
    response.setContentType("text/html;charset=utf-8");
 
        /**********   Begin   **********/
 
        //1.通过HbaseUtil类获取数据
        BickMap bickMap = HBaseUtil.scanTable(); //直接从左侧的提示栏中cv
   
		//2.遍历将数据以键值对的方式传给jsp
         for (String key : bickMap.getKeys()) {  
        request.setAttribute(key,bickMap.get(key));//遍历将最高五条流量的数据以“字段名-结果集合”形式传到后台  
    }    //直接从左侧的提示栏中cv
 
        /**********   End    **********/
        request.getRequestDispatcher("step2.jsp").forward(request,response);
 
    }
    @Override
    protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
        doGet(req,resp);
    }
}
 

4.共享单车之租赁需求预估

4.1数据探索与可视化

import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

#********* Begin *********#
import pandas as pd

import matplotlib.pyplot as plt

train_df = pd.read_csv('./step1/bike_train.csv')

train_df['hour'] = train_df.datetime.apply(lambda x:x.split()[1].split(':')[0]).astype('int')

group_hour=train_df.groupby(train_df.hour)

hour_mean=group_hour[['count','registered','casual']].mean()

fig=plt.figure(figsize=(10,10))

plt.plot(hour_mean['count'])

plt.title('average count per hour')

plt.savefig('./step1/result/plot.png')

#********* End *********#

4.2特征工程

import pandas as pd
import numpy as np
from datetime import datetime

def transform_data(train_df):
    '''
    将train_df中的datetime划分成year、month、date、weekday、hour
    :param train_df:从bike_train.csv中读取的DataFrame
    :return:'''

    #********* Begin *********#
    train_df['date'] = train_df.datetime.apply(lambda x:x.split()[0])
    train_df['hour'] = train_df.datetime.apply(lambda x:x.split()[1].split(':')[0]).astype('int')
    train_df['year'] = train_df.datetime.apply(lambda x:x.split()[0].split('-')[0]).astype('int')
    train_df['month'] = train_df.datetime.apply(lambda x: x.split()[0].split('-')[1]).astype('int')
    train_df['weekday'] = train_df.date.apply(lambda x: datetime.strptime(x, '%Y-%m-%d').isoweekday())
    return train_df

    #********* End **********#

4.3租赁需求预估

#********* Begin *********#  
import pandas as pd  
import numpy as np  
from datetime import datetime  
from sklearn.linear_model import Ridge
train_df = pd.read_csv('./step3/bike_train.csv')
# 舍弃掉异常count  
train_df=train_df[np.abs(train_df['count']-train_df['count'].mean())<=3*train_df['count'].std()]
# 训练集的时间数据处理
train_df['date']=train_df.datetime.apply(lambda x:x.split()[0])  
train_df['hour']=train_df.datetime.apply(lambda x:x.split()[1].split(':')[0]).astype('int')  
train_df['year']=train_df.datetime.apply(lambda x:x.split()[0].split('-')[0]).astype('int')  
train_df['month']=train_df.datetime.apply(lambda x:x.split()[0].split('-')[1]).astype('int')  
train_df['weekday']=train_df.date.apply( lambda x : datetime.strptime(x,'%Y-%m-%d').isoweekday())
# 独热编码  
train_df_back=train_df  
dummies_month = pd.get_dummies(train_df['month'], prefix='month')  
dummies_year = pd.get_dummies(train_df['year'], prefix='year')  
dummies_season = pd.get_dummies(train_df['season'], prefix='season')  
dummies_weather = pd.get_dummies(train_df['weather'], prefix='weather')
train_df_back = pd.concat([train_df, dummies_month,dummies_year, dummies_season,dummies_weather], axis = 1)
train_label = train_df_back['count']  
train_df_back = train_df_back.drop(['datetime', 'season', 'weather', 'atemp', 'date', 'month', 'count'], axis=1)
test_df = pd.read_csv('./step3/bike_test.csv')
# 测试集的时间数据处理  
test_df['date']=test_df.datetime.apply(lambda x:x.split()[0])  
test_df['hour']=test_df.datetime.apply(lambda x:x.split()[1].split(':')[0]).astype('int')  
test_df['year']=test_df.datetime.apply(lambda x:x.split()[0].split('-')[0]).astype('int')  
test_df['month']=test_df.datetime.apply(lambda x:x.split()[0].split('-')[1]).astype('int')  
test_df['weekday']=test_df.date.apply( lambda x : datetime.strptime(x,'%Y-%m-%d').isoweekday())
# 独热编码
test_df_back=test_df  
dummies_month = pd.get_dummies(test_df['month'], prefix='month')  
dummies_year = pd.get_dummies(test_df['year'], prefix='year')  
dummies_season = pd.get_dummies(test_df['season'], prefix='season')  
dummies_weather = pd.get_dummies(test_df['weather'], prefix='weather')
test_df_back = pd.concat([test_df, dummies_month,dummies_year, dummies_season,dummies_weather], axis = 1)  
test_df_back = test_df_back.drop(['datetime', 'season', 'weather', 'atemp', 'date', 'month'], axis=1)
clf = Ridge(alpha=1.0)
# 训练  
clf.fit(train_df_back, train_label)  
# 预测  
count = clf.predict(test_df_back)
# 保存结果  
result = pd.DataFrame({'datetime':test_df['datetime'], 'count':count})  
result.to_csv('./step3/result.csv', index=False)  
#********* End *********#  

  • 16
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值