算法(3) 移动平均算法 moving average

移动平均通常处理时间序列数据, 什么是数据序列? 所谓数据序列是指数据与时间关系极其密切,比如股票数据,每个股票的价格根据秒,分,小时,天变化而变化,时间错乱,会导致数据完全无效, 再比如监控数据,还有一些工业上的设备数据。 因此现在有专门的时间序列数据库OpenTSDB.

具体移动平均概念不做具体介绍了,有什么多资料可以查询。 这里只展示通过MapReduce来处理移动平均算法的代码。 以下是简单移动平均的处理过程。


原始数据如下:

gold,2017-11-11,89
gold,2017-11-12,189
gold,2017-11-13,289
gold,2017-11-14,389
gold,2017-11-15,489
gold,2017-11-16,589
gold,2017-11-17,689
gold,2017-11-18,789
gold,2017-11-19,889
gold,2017-11-20,989
dog,2017-11-13,19
dog,2017-11-14,29
dog,2017-11-15,39
dog,2017-11-16,49
dog,2017-11-17,59
dog,2017-11-18,69
dog,2017-11-19,89
dog,2017-11-20,99


我们假设以上是2个股票,分别是gold, dog,  然后上面最右边的数据是每天的收盘价, 现在要计算这2个股票以3天为单位的平均数。 以gold为例:

第一天,89, 第二天 89+189 / 2  第三天 89+189+289/3 第四天 189+289+389/3  以此类推。


移动平均刚才说了,时间顺序非常重要,所以所有的数据必须先根据时间排序。 要通过mapreduce实现,那么先要实现二次排序,在二次排序中介绍过,mapreduce只会对key排序.  所以要对时间排序,我们也要先实现二次排序,然后拿排好序的数据去做移动平均。


代码如下:

package com.isesol.mapreduce;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.LinkedList;
import java.util.Queue;

import org.apache.curator.framework.recipes.barriers.DistributedDoubleBarrier;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.cloudera.io.netty.handler.codec.http.HttpContentEncoder.Result;
import com.sun.org.apache.bcel.internal.generic.NEW;


public class movingAverage {

	public static class TokenizerMapper extends Mapper<Object, Text, compositekey, Text> {

		private Text data = new Text();
		private compositekey newkey = new compositekey();

		public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
			String[] val = value.toString().split(",");
			String stock = val[0];
			String time = val[1];
			String price = val[2];
			newkey.setStock(stock);
			newkey.setTime(val[1]);

			context.write(newkey, new Text(price));
		}

	}

	public static class twopartitions extends Partitioner<compositekey, Text> implements Configurable {

		public int getPartition(compositekey key, Text value, int numPartitions) {
			// TODO Auto-generated method stub

			return (key.getStock().hashCode() & Integer.MAX_VALUE) % numPartitions;
		}

		public void setConf(Configuration conf) {
			// TODO Auto-generated method stub
		}

		public Configuration getConf() {
			// TODO Auto-generated method stub
			return null;
		}

	}

	public static class compositekeyComparator extends WritableComparator {

		public compositekeyComparator() {
			super(compositekey.class, true);
		}

		public int compare(WritableComparable a, WritableComparable b) {
			compositekey a1 = (compositekey) a;
			compositekey b1 = (compositekey) b;

			int compare = a1.getStock().compareTo(b1.getStock());
			if (compare != 0) {
				return compare;
			} else {
				return a1.getTime().compareTo(b1.getTime());
			}

		}

	}

	public static class compositekey implements WritableComparable<compositekey> {

		private String stock;
		private String time;

		public void setStock(String stock) {

			this.stock = stock;
		}

		public String getStock() {

			return this.stock;
		}

		public void setTime(String time) {
			this.time = time;
		}

		public String getTime() {
			return time;
		}

		public void write(DataOutput out) throws IOException {
			// TODO Auto-generated method stub
			out.writeUTF(this.getStock());
			out.writeUTF(this.getTime());
		}

		public void readFields(DataInput in) throws IOException {
			// TODO Auto-generated method stub

			stock = in.readUTF();
			time = in.readUTF();
		}

		public String toString() {
			return stock + "," + time;
		}

		public int compareTo(compositekey o) {
			// TODO Auto-generated method stub
			return 0;
		}

	}

	public static class DefinedGroupSort extends WritableComparator {

		protected DefinedGroupSort() {
			super(compositekey.class, true);
		}

		@Override
		public int compare(WritableComparable a, WritableComparable b) {

			compositekey a1 = (compositekey) a;
			compositekey b1 = (compositekey) b;

			return a1.getStock().compareTo(b1.getStock());
		}

	}

	
	public static class IntSumReducer extends Reducer<compositekey, Text, Text, DoubleWritable> {

		//private int windowsize = 3;
		private double result = 0.0;
		
		public void reduce(compositekey key, Iterable<Text> value, Context context)
				throws IOException, InterruptedException {
					
			simplemovingaverage smg = new simplemovingaverage();
			
			for (Text values : value) {
				
				//每个value添加到Queue,然后计算移动平均数,直接就返回
				
				smg.addNewNumber(Integer.parseInt(values.toString()));

				result = smg.getMovingAverage();
				context.write(new Text(key.getStock()), new DoubleWritable(result));
			}

		//	context.write(new Text(key.getStock()), new DoubleWritable(result));

		}
	}
	
	
	//实现移动平均的算法,通过Queue来实现
	
	public static class simplemovingaverage {
		
		private double sum = 0.0;
		private int period = 3;
		private final Queue<Double> window = new LinkedList<Double>();
		
		public void addNewNumber(double number) {
			sum += number;
			window.add(number);
			if(window.size() > period) {
				sum -=window.remove();
			}
		}
		
		public double getMovingAverage(){
			if(window.isEmpty()){
				throw new IllegalArgumentException("undefined");
			}
			return sum / window.size();
		}
		
	}

	
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf, "movingAverage");
		job.setJarByClass(movingAverage.class);
		job.setMapperClass(TokenizerMapper.class);
		job.setMapOutputKeyClass(compositekey.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(IntSumReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(DoubleWritable.class);
		job.setPartitionerClass(twopartitions.class);
		job.setSortComparatorClass(compositekeyComparator.class);
		job.setGroupingComparatorClass(DefinedGroupSort.class);
		job.setNumReduceTasks(1);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		System.exit(job.waitForCompletion(true) ? 0 : 1);

	}

}


结果如下:

dog	19.0
dog	24.0
dog	29.0
dog	39.0
dog	49.0
dog	59.0
dog	72.33333333333333
dog	85.66666666666667
gold	89.0
gold	139.0
gold	189.0
gold	289.0
gold	389.0
gold	489.0
gold	589.0
gold	689.0
gold	789.0
gold	889.0






  • 0
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

tom_fans

谢谢打赏

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值