hadoop 求平均时间

   sqoop 求订单完成是平均时间精确到0.1天

package hdfs.demo3;


import hdfs.constants.DemoLineInputFormat;
import hdfs.constants.Utils;

import java.io.IOException;
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.util.Date;
import java.util.Vector;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


public class Demo3 {  
 
	//解析 hdfs中的数据
	//DAYTIME 在下标为 51位置的值
	//RCV_DATE 在下标为 47位置的值
	//DISPOSEID 在下标为 24位置的值
	  public static class MapHDFS extends Mapper<LongWritable,Text, LongWritable, Text>{
		  	LongWritable	rkey	= new LongWritable();
		  	Text	rval	= new Text();
			long time; //一次交易花费的时间
			String[] strs=new String[1];
			public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
				strs=value.toString().split("\1");
				  
				Date startTime= Utils.getDate(strs[51]);
				Date stopTime= Utils.getDate(strs[47]);
				if(startTime==null||stopTime==null){
					return ;
				}
				time=stopTime.getTime()/1000-startTime.getTime()/1000;
				if(time<0){ //错误数据
					return ;
				}
				rkey.set(time%1000);  //毫秒 变 秒
				rval.set(time/3600+"") ; // 精确到小时 
				context.write(rkey, rval); // 输出到Combiner中 并组合
			}
	  }
	  
	  static class Combin extends Reducer<LongWritable,Text, LongWritable, Text>{
		  LongWritable	wkey	= new LongWritable();  
		  Text	val	= new Text();
		  public  void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
			  int sum =0;
			  int count=0;
			  for(Text v:values){
				  count++;
				  sum+=Long.valueOf(v.toString());
			  }
			  val.set("#"+count+"#"+sum);
			  context.write(wkey,val);
		  }
	  }

	public static class Reduce extends Reducer<LongWritable, Text, Text, Text> {
		Text writKey = new Text();
		Text writValue = new Text();
		public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
			long count=0;
			long sum = 0;
			String [] strs=new String [2];
			for (Text element : values) {
				strs=element.toString().split("#");
				count+=Long.valueOf(strs[1]); // key
				sum+=Long.valueOf(strs[2]); // value
			}
			Timestamp tt=new Timestamp(sum/count*3600*1000);
			writKey.set(count+"");
			writValue.set(getDate(tt.getTime())+"\t"+getDayDate(tt.getTime()));
			context.write(writKey, writValue);
		}

	}
	
	/**
	 *  取天 精确到0.1天
	 * @param tim 时间戳 毫秒级别
	 * @return
	 */
	private static String getDayDate(Long tim){
		String str =tim+":时间:\t";
		str+=new BigDecimal(tim/(1000*60*60*24.0)).setScale(2, BigDecimal.ROUND_HALF_UP)+"天 ";
		return str;
	}
	/**
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		
		/**
		 * ./sqoop import --connect jdbc:oracle:thin:@10.10.35.65:1521/crm_standby  
		 *  --username CRMDEV --password crmdev --table ORDR_MAIN 
		 * --where 'rownum<1000' --fields-terminated-by '\0x001' --lines-terminated-by '\0x002'
		 */
		String outputPath="hdfs://172.19.121.125:9000/user/admin/ORDR_MAIN";
		new Demo3().avgTime(outputPath,conf);
	}
	 
	
	private void avgTime(String path,Configuration conf){
			Path outPath=new Path(path+"_avg");
			try {
				FileSystem fs=outPath.getFileSystem(conf);
				fs.deleteOnExit(outPath);
			} catch (IOException e1) {
//				e1.printStackTrace();
			}
		try {
			Job job = new Job(conf, "tAvgTime");
			
			// 设置map
			job.setMapperClass(MapHDFS.class);
			job.setCombinerClass(Combin.class);
			job.setReducerClass(Reduce.class);
			
			// 设置输出类型 (付款时间和结账时间的  时间戳值)
			job.setOutputKeyClass(LongWritable.class);
			job.setOutputValueClass(Text.class);
			
			job.setInputFormatClass(DemoLineInputFormat.class);
			job.setOutputFormatClass(TextOutputFormat.class);
			
			FileInputFormat.addInputPath(job, new Path(path));
			// 设置输入输出路径
			FileOutputFormat.setOutputPath(job, new Path(path+"_avg"));
			
			job.waitForCompletion(true);
			} catch (Exception e) {
				e.printStackTrace();
			}
					
	}
	
	
}

 

main 方法中贴出的sqoop 语句 由于指定了字段分隔符和行分隔符所以需要自定义LineReader分割文件;

       参考读取text文件的方法

复制 org.apache.hadoop.mapreduce.lib.input.LineInputFormat      为  DemoLineInputFormat

复制 org.apache.hadoop.mapreduce.lib.input.LineRecordReader  为  DemoLineRecordReader

复制 org.apache.hadoop.util.LineReader              为  DemoLineReader

 

         job 设置读取文件的 InputFormat

job.setInputFormatClass(DemoLineInputFormat.class);

          修改他们的引用关系:

 DemoLineInputFormat     中的  LineRecordReader 修改为DemoLineRecordReader

 DemoLineRecordReader 中的  LineReader  修改为DemoLineReader

 

           修改 DemoLineReader 的常量LF  为sqoop导出语句中的行分隔符 '\2'

private static final byte LF = '\2'; //  默认为'\n'   //HDFS文件的换行符

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值