问题
求数据流中最频繁的元素,下面用MG算法近似解决求解这个问题。
代码
Java
import java.util.*;
public class FrequentElement {
public static void main(String[] args) {
int k = 3;//计数器的个数
Random random = new Random();
Map<Integer, Integer> counters = new HashMap();
int i = 0;//这个用于统计总的数量
while (i < 10){//真实的数据流应该是源源不断的,这里简化为有限个了
Integer element = random.nextInt(5);
if (counters.size()<k){//模拟k个计数器
System.out.println("正在计数");
counters.put(element, counters.getOrDefault(element, 0)+1);
System.out.println(counters);
}else{//k个计数器都用上的时候,进行一轮减1
System.out.println("开始一轮减1");
System.out.println(counters);
//只能用迭代器,不可用foreach,避免ConcurrentModificationException
Iterator<Integer> iterator = counters.keySet().iterator();
while (iterator.hasNext()){
Integer key = iterator.next();
if(counters.get(key)==1) iterator.remove();
else counters.put(key, counters.get(key)-1);
}
System.out.println(counters);
}
i++;
}
int finalSum = 0;
for (Integer key:counters.keySet()) finalSum += counters.get(key);
System.out.println("与精确值最多相差"+(i-finalSum)/(k+1));
}
}
这个有点复杂,看一下模拟的效果
Scala
import scala.util.Random
object FrequentElement2 {
def main(args: Array[String]): Unit = {
val k=3
var counters = collection.mutable.HashMap[Int,Int]()
var i = 0
while (i<10){
val element = Random.nextInt(5)
if (counters.size < k){//模拟k个计数器
println("正在计数")
counters.put(element, counters.getOrElse(element, 0)+1);
println(counters);
}else{//k个计数器都用上的时候,进行一轮减1
println("开始一轮减1");
println(counters);
counters = counters.map{case (key,value)=>(key, value-1)}.filter(_._2>0)
println(counters)
}
i+=1
}
println(counters)
val finalSum = counters.values.sum
println("与精确值最多相差"+(i-finalSum)/(k+1));
}
}
Python
import random
k = 3
counters = {}
i = 0
while i < 10:
element = random.randint(0, 5)
if len(counters) < k:
print("正在计数")
counters[element] = counters.get(element, 0)+1
print(counters)
else:
print("开始一轮减1")
for key, value in counters.items():
if value > 1:
value -= 1
counters = dict((key, value) for key, value in counters.items() if value != 0)
print(counters)
i+=1
finalSum = sum(counters.values())
print("与精确值最多相差%d"%((i-finalSum)/(k+1)))