基数预估算法 错误率验证

package hllctest

import java.util

import org.scalatest.{FlatSpec}
import org.spark.sqludf.HLLCounter

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.util.Random

class HllcCrossSetTest extends FlatSpec {

val ramdom = new Random()
val m = 18

// 用于验证hllc 的错误率

def errorRateCal(sampleCount: Int) = {

errorRate(0.01, sampleCount)
errorRate(0.05, sampleCount)
errorRate(0.1, sampleCount)
errorRate(0.2, sampleCount)
errorRate(0.5, sampleCount)

}


"hllc test" should "hllc merge,mix error rate" in {
errorRateCal(1000)
errorRateCal(2000)
errorRateCal(5000)
errorRateCal(10000)
errorRateCal(20000)
errorRateCal(50000)
errorRateCal(100000)
errorRateCal(200000)
errorRateCal(500000)
errorRateCal(100000)
errorRateCal(200000)
errorRateCal(500000)
}


// 不放回抽样 测试集合生成
def getRandomStr(setCollection: mutable.HashSet[String], totalIntArray: Array[Int]): Unit = {
val str = getTestString(totalIntArray)
if (!setCollection.contains(str)) setCollection.add(str)
else
getRandomStr(setCollection, totalIntArray)
}


//5% 100w 级别集合 计算总量
def errorRate(r: Double, testSetLength: Int) = {
println(s" ********************begin r :{$r},testSetLength:{$testSetLength} ,m:{$m} ***********************")
var setA = new mutable.HashSet[String]()
var setB = new mutable.HashSet[String]()
val tatolCount = (testSetLength / r).toInt
var b = System.currentTimeMillis()

val totalIntArray = (tatolCount + "").toCharArray.map(x => x.toString.toInt)

var timeRecord = System.currentTimeMillis()
for (i <- 0 until testSetLength) {
getRandomStr(setA, totalIntArray)
getRandomStr(setB, totalIntArray)
// if (i%5000 == 0) {
// println(s" generate data ${i} ,cost: ${System.currentTimeMillis()-timeRecord}) ")
// timeRecord = System.currentTimeMillis()
// }
}


println(s"tatolCount: ${tatolCount} ,r : ${r} setA size: ${setA.size} , setB size: ${setB.size} ")

var e = System.currentTimeMillis()
println(s" generate data cost time: ${e - b} ")

/*
* realMix 交集
* hllcMix
* realMerge
* hllcMerge
* */

b = System.currentTimeMillis()
val realMixCnt = realMix(setA, setB)
val mixRate = realMixRate(realMixCnt, setA)
println(s" realMixCnt: ${realMixCnt} , mixRate:${mixRate}")
e = System.currentTimeMillis()
println(s" Map collection cost time: ${e - b} ")

b = System.currentTimeMillis()
val hllcMixCnt = hllcMix(setA, setB)
val mixRatehllc = realMixRate(hllcMixCnt, setA)
println(s" hllcMixCnt: ${hllcMixCnt} , mixRatehllc:${mixRatehllc}")
val distinct = mixRatehllc - mixRate
println(f" mixRatehllc - mixRate: $distinct%1.6f ")
println(f" hllcMixCnt - realMixCnt: ${hllcMixCnt - realMixCnt} ")
e = System.currentTimeMillis()
println(s" hllc cost time: ${e - b} ")


}


def realMix(setA: mutable.HashSet[String], setB: mutable.HashSet[String]) = {
val hashSet = new util.HashSet[String]
setA.foreach(str => if (!hashSet.contains(str)) hashSet.add(str))
var mixCount = 0
setB.foreach(str => if (hashSet.contains(str)) mixCount += 1)
mixCount
}


def realMixRate(mixCount: Int, set: mutable.HashSet[String]) = {
mixCount * 1.0 / set.size
}


def realMixRate(mixCount: Long, set: mutable.HashSet[String]) = {
mixCount * 1.0 / set.size
}


def hllcMix(setA: mutable.HashSet[String], setB: mutable.HashSet[String]): Long = {

val hllc16A = new HLLCounter(m)
setA.foreach(item => hllc16A.add(item))

val hllc16B = new HLLCounter(m)
setA.foreach(item => hllc16B.add(item))
hllc16A.getCountEstimate + hllc16B.getCountEstimate - hllcMerge(setA, setB)
}


def hllcMerge(setA: mutable.HashSet[String], setB: mutable.HashSet[String]) = {
val hllc16 = new HLLCounter(m)
setA.foreach(item => hllc16.add(item))
setB.foreach(item => hllc16.add(item))
hllc16.getCountEstimate
}

def realMerge(setA: ArrayBuffer[String], setB: ArrayBuffer[String]) = {
val hashSet = new util.HashSet[String]
setA.foreach(str => if (!hashSet.contains(str)) hashSet.add(str))
setB.foreach(str => if (!hashSet.contains(str)) hashSet.add(str))
hashSet.size()
}


def getTestString(totalCountArray: Array[Int]) = {
val sbf = new StringBuffer()
//没一位的数字是几, 然后根据这个来生成随机数
totalCountArray.foreach(s => {
if (!0.equals(s))
sbf.append(getRamdomStringS(s))
else sbf.append(getRamdomStringS(10))
})
sbf.toString
}


// n -> 10 ^^n
def getRamdomString(length: Int): String = {
val sbf = new StringBuffer()
for (i <- 0 until length) sbf.append((ramdom.nextInt(10) + 97).toChar)
sbf.toString
}


// n -> 10 ^^n
def getRamdomStringS(l: Int): String = {
(ramdom.nextInt(l) + 97).toChar.toString
}


}


********************begin r :{0.01},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 100000 ,r : 0.01 setA size: 1000 , setB size: 1000
generate data cost time: 48
realMixCnt: 12 , mixRate:0.012
Map collection cost time: 6
hllcMixCnt: 8 , mixRatehllc:0.008
mixRatehllc - mixRate: -0.004000
hllcMixCnt - realMixCnt: -4
hllc cost time: 131
********************begin r :{0.05},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 20000 ,r : 0.05 setA size: 1000 , setB size: 1000
generate data cost time: 4
realMixCnt: 49 , mixRate:0.049
Map collection cost time: 3
hllcMixCnt: 48 , mixRatehllc:0.048
mixRatehllc - mixRate: -0.001000
hllcMixCnt - realMixCnt: -1
hllc cost time: 19
********************begin r :{0.1},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 10000 ,r : 0.1 setA size: 1000 , setB size: 1000
generate data cost time: 3
realMixCnt: 108 , mixRate:0.108
Map collection cost time: 5
hllcMixCnt: 107 , mixRatehllc:0.107
mixRatehllc - mixRate: -0.001000
hllcMixCnt - realMixCnt: -1
hllc cost time: 15
********************begin r :{0.2},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 5000 ,r : 0.2 setA size: 1000 , setB size: 1000
generate data cost time: 3
realMixCnt: 196 , mixRate:0.196
Map collection cost time: 1
hllcMixCnt: 195 , mixRatehllc:0.195
mixRatehllc - mixRate: -0.001000
hllcMixCnt - realMixCnt: -1
hllc cost time: 16
********************begin r :{0.5},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 2000 ,r : 0.5 setA size: 1000 , setB size: 1000
generate data cost time: 7
realMixCnt: 489 , mixRate:0.489
Map collection cost time: 1
hllcMixCnt: 490 , mixRatehllc:0.49
mixRatehllc - mixRate: 0.001000
hllcMixCnt - realMixCnt: 1
hllc cost time: 8
********************begin r :{0.01},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 200000 ,r : 0.01 setA size: 2000 , setB size: 2000
generate data cost time: 6
realMixCnt: 11 , mixRate:0.0055
Map collection cost time: 0
hllcMixCnt: 19 , mixRatehllc:0.0095
mixRatehllc - mixRate: 0.004000
hllcMixCnt - realMixCnt: 8
hllc cost time: 24
********************begin r :{0.05},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 40000 ,r : 0.05 setA size: 2000 , setB size: 2000
generate data cost time: 5
realMixCnt: 102 , mixRate:0.051
Map collection cost time: 1
hllcMixCnt: 110 , mixRatehllc:0.055
mixRatehllc - mixRate: 0.004000
hllcMixCnt - realMixCnt: 8
hllc cost time: 11
********************begin r :{0.1},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 20000 ,r : 0.1 setA size: 2000 , setB size: 2000
generate data cost time: 4
realMixCnt: 192 , mixRate:0.096
Map collection cost time: 0
hllcMixCnt: 192 , mixRatehllc:0.096
mixRatehllc - mixRate: 0.000000
hllcMixCnt - realMixCnt: 0
hllc cost time: 11
********************begin r :{0.2},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 10000 ,r : 0.2 setA size: 2000 , setB size: 2000
generate data cost time: 3
realMixCnt: 395 , mixRate:0.1975
Map collection cost time: 1
hllcMixCnt: 387 , mixRatehllc:0.1935
mixRatehllc - mixRate: -0.004000
hllcMixCnt - realMixCnt: -8
hllc cost time: 12
********************begin r :{0.5},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 4000 ,r : 0.5 setA size: 2000 , setB size: 2000
generate data cost time: 6
realMixCnt: 986 , mixRate:0.493
Map collection cost time: 1
hllcMixCnt: 981 , mixRatehllc:0.4905
mixRatehllc - mixRate: -0.002500
hllcMixCnt - realMixCnt: -5
hllc cost time: 16
********************begin r :{0.01},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 500000 ,r : 0.01 setA size: 5000 , setB size: 5000
generate data cost time: 17
realMixCnt: 48 , mixRate:0.0096
Map collection cost time: 2
hllcMixCnt: 41 , mixRatehllc:0.0082
mixRatehllc - mixRate: -0.001400
hllcMixCnt - realMixCnt: -7
hllc cost time: 13
********************begin r :{0.05},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 100000 ,r : 0.05 setA size: 5000 , setB size: 5000
generate data cost time: 7
realMixCnt: 263 , mixRate:0.0526
Map collection cost time: 1
hllcMixCnt: 271 , mixRatehllc:0.0542
mixRatehllc - mixRate: 0.001600
hllcMixCnt - realMixCnt: 8
hllc cost time: 13
********************begin r :{0.1},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 50000 ,r : 0.1 setA size: 5000 , setB size: 5000
generate data cost time: 6
realMixCnt: 527 , mixRate:0.1054
Map collection cost time: 0
hllcMixCnt: 526 , mixRatehllc:0.1052
mixRatehllc - mixRate: -0.000200
hllcMixCnt - realMixCnt: -1
hllc cost time: 26
********************begin r :{0.2},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 25000 ,r : 0.2 setA size: 5000 , setB size: 5000
generate data cost time: 16
realMixCnt: 2505 , mixRate:0.501
Map collection cost time: 4
hllcMixCnt: 2496 , mixRatehllc:0.4992
mixRatehllc - mixRate: -0.001800
hllcMixCnt - realMixCnt: -9
hllc cost time: 16
********************begin r :{0.5},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 10000 ,r : 0.5 setA size: 5000 , setB size: 5000
generate data cost time: 8
realMixCnt: 2499 , mixRate:0.4998
Map collection cost time: 1
hllcMixCnt: 2505 , mixRatehllc:0.501
mixRatehllc - mixRate: 0.001200
hllcMixCnt - realMixCnt: 6
hllc cost time: 14
********************begin r :{0.01},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 1000000 ,r : 0.01 setA size: 10000 , setB size: 10000
generate data cost time: 15
realMixCnt: 103 , mixRate:0.0103
Map collection cost time: 2
hllcMixCnt: 62 , mixRatehllc:0.0062
mixRatehllc - mixRate: -0.004100
hllcMixCnt - realMixCnt: -41
hllc cost time: 24
********************begin r :{0.05},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 200000 ,r : 0.05 setA size: 10000 , setB size: 10000
generate data cost time: 18
realMixCnt: 484 , mixRate:0.0484
Map collection cost time: 2
hllcMixCnt: 467 , mixRatehllc:0.0467
mixRatehllc - mixRate: -0.001700
hllcMixCnt - realMixCnt: -17
hllc cost time: 18
********************begin r :{0.1},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 100000 ,r : 0.1 setA size: 10000 , setB size: 10000
generate data cost time: 11
realMixCnt: 938 , mixRate:0.0938
Map collection cost time: 1
hllcMixCnt: 967 , mixRatehllc:0.0967
mixRatehllc - mixRate: 0.002900
hllcMixCnt - realMixCnt: 29
hllc cost time: 12
********************begin r :{0.2},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 50000 ,r : 0.2 setA size: 10000 , setB size: 10000
generate data cost time: 10
realMixCnt: 1997 , mixRate:0.1997
Map collection cost time: 5
hllcMixCnt: 1999 , mixRatehllc:0.1999
mixRatehllc - mixRate: 0.000200
hllcMixCnt - realMixCnt: 2
hllc cost time: 15
********************begin r :{0.5},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 20000 ,r : 0.5 setA size: 10000 , setB size: 10000
generate data cost time: 18
realMixCnt: 5010 , mixRate:0.501
Map collection cost time: 4
hllcMixCnt: 4990 , mixRatehllc:0.499
mixRatehllc - mixRate: -0.002000
hllcMixCnt - realMixCnt: -20
hllc cost time: 23
********************begin r :{0.01},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 2000000 ,r : 0.01 setA size: 20000 , setB size: 20000
generate data cost time: 41
realMixCnt: 218 , mixRate:0.0109
Map collection cost time: 4
hllcMixCnt: 134 , mixRatehllc:0.0067
mixRatehllc - mixRate: -0.004200
hllcMixCnt - realMixCnt: -84
hllc cost time: 36
********************begin r :{0.05},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 400000 ,r : 0.05 setA size: 20000 , setB size: 20000
generate data cost time: 19
realMixCnt: 946 , mixRate:0.0473
Map collection cost time: 3
hllcMixCnt: 949 , mixRatehllc:0.04745
mixRatehllc - mixRate: 0.000150
hllcMixCnt - realMixCnt: 3
hllc cost time: 23
********************begin r :{0.1},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 200000 ,r : 0.1 setA size: 20000 , setB size: 20000
generate data cost time: 26
realMixCnt: 2001 , mixRate:0.10005
Map collection cost time: 10
hllcMixCnt: 2080 , mixRatehllc:0.104
mixRatehllc - mixRate: 0.003950
hllcMixCnt - realMixCnt: 79
hllc cost time: 56
********************begin r :{0.2},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 100000 ,r : 0.2 setA size: 20000 , setB size: 20000
generate data cost time: 28
realMixCnt: 4034 , mixRate:0.2017
Map collection cost time: 7
hllcMixCnt: 4113 , mixRatehllc:0.20565
mixRatehllc - mixRate: 0.003950
hllcMixCnt - realMixCnt: 79
hllc cost time: 25
********************begin r :{0.5},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 40000 ,r : 0.5 setA size: 20000 , setB size: 20000
generate data cost time: 24
realMixCnt: 9975 , mixRate:0.49875
Map collection cost time: 8
hllcMixCnt: 9994 , mixRatehllc:0.4997
mixRatehllc - mixRate: 0.000950
hllcMixCnt - realMixCnt: 19
hllc cost time: 23
********************begin r :{0.01},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 5000000 ,r : 0.01 setA size: 50000 , setB size: 50000
generate data cost time: 88
realMixCnt: 468 , mixRate:0.00936
Map collection cost time: 48
hllcMixCnt: 603 , mixRatehllc:0.01206
mixRatehllc - mixRate: 0.002700
hllcMixCnt - realMixCnt: 135
hllc cost time: 163
********************begin r :{0.05},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 1000000 ,r : 0.05 setA size: 50000 , setB size: 50000
generate data cost time: 99
realMixCnt: 2381 , mixRate:0.04762
Map collection cost time: 17
hllcMixCnt: 2335 , mixRatehllc:0.0467
mixRatehllc - mixRate: -0.000920
hllcMixCnt - realMixCnt: -46
hllc cost time: 59
********************begin r :{0.1},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 500000 ,r : 0.1 setA size: 50000 , setB size: 50000
generate data cost time: 52
realMixCnt: 5091 , mixRate:0.10182
Map collection cost time: 19
hllcMixCnt: 5116 , mixRatehllc:0.10232
mixRatehllc - mixRate: 0.000500
hllcMixCnt - realMixCnt: 25
hllc cost time: 88
********************begin r :{0.2},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 250000 ,r : 0.2 setA size: 50000 , setB size: 50000
generate data cost time: 72
realMixCnt: 24889 , mixRate:0.49778
Map collection cost time: 18
hllcMixCnt: 25002 , mixRatehllc:0.50004
mixRatehllc - mixRate: 0.002260
hllcMixCnt - realMixCnt: 113
hllc cost time: 61
********************begin r :{0.5},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 100000 ,r : 0.5 setA size: 50000 , setB size: 50000
generate data cost time: 86
realMixCnt: 25140 , mixRate:0.5028
Map collection cost time: 15
hllcMixCnt: 25190 , mixRatehllc:0.5038
mixRatehllc - mixRate: 0.001000
hllcMixCnt - realMixCnt: 50
hllc cost time: 71
********************begin r :{0.01},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 10000000 ,r : 0.01 setA size: 100000 , setB size: 100000
generate data cost time: 154
realMixCnt: 1051 , mixRate:0.01051
Map collection cost time: 29
hllcMixCnt: 811 , mixRatehllc:0.00811
mixRatehllc - mixRate: -0.002400
hllcMixCnt - realMixCnt: -240
hllc cost time: 232
********************begin r :{0.05},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 2000000 ,r : 0.05 setA size: 100000 , setB size: 100000
generate data cost time: 171
realMixCnt: 4903 , mixRate:0.04903
Map collection cost time: 19
hllcMixCnt: 5095 , mixRatehllc:0.05095
mixRatehllc - mixRate: 0.001920
hllcMixCnt - realMixCnt: 192
hllc cost time: 122
********************begin r :{0.1},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 1000000 ,r : 0.1 setA size: 100000 , setB size: 100000
generate data cost time: 131
realMixCnt: 9931 , mixRate:0.09931
Map collection cost time: 42
hllcMixCnt: 10136 , mixRatehllc:0.10136
mixRatehllc - mixRate: 0.002050
hllcMixCnt - realMixCnt: 205
hllc cost time: 155
********************begin r :{0.2},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 500000 ,r : 0.2 setA size: 100000 , setB size: 100000
generate data cost time: 117
realMixCnt: 20148 , mixRate:0.20148
Map collection cost time: 35
hllcMixCnt: 20414 , mixRatehllc:0.20414
mixRatehllc - mixRate: 0.002660
hllcMixCnt - realMixCnt: 266
hllc cost time: 111
********************begin r :{0.5},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 200000 ,r : 0.5 setA size: 100000 , setB size: 100000
generate data cost time: 130
realMixCnt: 49964 , mixRate:0.49964
Map collection cost time: 35
hllcMixCnt: 50268 , mixRatehllc:0.50268
mixRatehllc - mixRate: 0.003040
hllcMixCnt - realMixCnt: 304
hllc cost time: 133
********************begin r :{0.01},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 20000000 ,r : 0.01 setA size: 200000 , setB size: 200000
generate data cost time: 260
realMixCnt: 2035 , mixRate:0.010175
Map collection cost time: 83
hllcMixCnt: 1247 , mixRatehllc:0.006235
mixRatehllc - mixRate: -0.003940
hllcMixCnt - realMixCnt: -788
hllc cost time: 389
********************begin r :{0.05},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 4000000 ,r : 0.05 setA size: 200000 , setB size: 200000
generate data cost time: 311
realMixCnt: 10159 , mixRate:0.050795
Map collection cost time: 94
hllcMixCnt: 10030 , mixRatehllc:0.05015
mixRatehllc - mixRate: -0.000645
hllcMixCnt - realMixCnt: -129
hllc cost time: 308
********************begin r :{0.1},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 2000000 ,r : 0.1 setA size: 200000 , setB size: 200000
generate data cost time: 255
realMixCnt: 20009 , mixRate:0.100045
Map collection cost time: 133
hllcMixCnt: 19539 , mixRatehllc:0.097695
mixRatehllc - mixRate: -0.002350
hllcMixCnt - realMixCnt: -470
hllc cost time: 235
********************begin r :{0.2},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 1000000 ,r : 0.2 setA size: 200000 , setB size: 200000
generate data cost time: 229
realMixCnt: 39946 , mixRate:0.19973
Map collection cost time: 92
hllcMixCnt: 41310 , mixRatehllc:0.20655
mixRatehllc - mixRate: 0.006820
hllcMixCnt - realMixCnt: 1364
hllc cost time: 271
********************begin r :{0.5},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 400000 ,r : 0.5 setA size: 200000 , setB size: 200000
generate data cost time: 357
realMixCnt: 100095 , mixRate:0.500475
Map collection cost time: 93
hllcMixCnt: 100242 , mixRatehllc:0.50121
mixRatehllc - mixRate: 0.000735
hllcMixCnt - realMixCnt: 147
hllc cost time: 422
********************begin r :{0.01},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 50000000 ,r : 0.01 setA size: 500000 , setB size: 500000
generate data cost time: 758
realMixCnt: 5084 , mixRate:0.010168
Map collection cost time: 211
hllcMixCnt: 2978 , mixRatehllc:0.005956
mixRatehllc - mixRate: -0.004212
hllcMixCnt - realMixCnt: -2106
hllc cost time: 844
********************begin r :{0.05},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 10000000 ,r : 0.05 setA size: 500000 , setB size: 500000
generate data cost time: 721
realMixCnt: 25296 , mixRate:0.050592
Map collection cost time: 222
hllcMixCnt: 23440 , mixRatehllc:0.04688
mixRatehllc - mixRate: -0.003712
hllcMixCnt - realMixCnt: -1856
hllc cost time: 699
********************begin r :{0.1},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 5000000 ,r : 0.1 setA size: 500000 , setB size: 500000
generate data cost time: 688
realMixCnt: 50178 , mixRate:0.100356
Map collection cost time: 200
hllcMixCnt: 45070 , mixRatehllc:0.09014
mixRatehllc - mixRate: -0.010216
hllcMixCnt - realMixCnt: -5108
hllc cost time: 701
********************begin r :{0.2},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 2500000 ,r : 0.2 setA size: 500000 , setB size: 500000
generate data cost time: 897
realMixCnt: 249899 , mixRate:0.499798
Map collection cost time: 223
hllcMixCnt: 250263 , mixRatehllc:0.500526
mixRatehllc - mixRate: 0.000728
hllcMixCnt - realMixCnt: 364
hllc cost time: 658
********************begin r :{0.5},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 1000000 ,r : 0.5 setA size: 500000 , setB size: 500000
generate data cost time: 868
realMixCnt: 249895 , mixRate:0.49979
Map collection cost time: 245
hllcMixCnt: 249916 , mixRatehllc:0.499832
mixRatehllc - mixRate: 0.000042
hllcMixCnt - realMixCnt: 21
hllc cost time: 724
********************begin r :{0.01},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 10000000 ,r : 0.01 setA size: 100000 , setB size: 100000
generate data cost time: 110
realMixCnt: 1026 , mixRate:0.01026
Map collection cost time: 28
hllcMixCnt: 569 , mixRatehllc:0.00569
mixRatehllc - mixRate: -0.004570
hllcMixCnt - realMixCnt: -457
hllc cost time: 95
********************begin r :{0.05},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 2000000 ,r : 0.05 setA size: 100000 , setB size: 100000
generate data cost time: 91
realMixCnt: 5024 , mixRate:0.05024
Map collection cost time: 26
hllcMixCnt: 5439 , mixRatehllc:0.05439
mixRatehllc - mixRate: 0.004150
hllcMixCnt - realMixCnt: 415
hllc cost time: 131
********************begin r :{0.1},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 1000000 ,r : 0.1 setA size: 100000 , setB size: 100000
generate data cost time: 93
realMixCnt: 9925 , mixRate:0.09925
Map collection cost time: 28
hllcMixCnt: 10201 , mixRatehllc:0.10201
mixRatehllc - mixRate: 0.002760
hllcMixCnt - realMixCnt: 276
hllc cost time: 141
********************begin r :{0.2},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 500000 ,r : 0.2 setA size: 100000 , setB size: 100000
generate data cost time: 90
realMixCnt: 19983 , mixRate:0.19983
Map collection cost time: 32
hllcMixCnt: 19936 , mixRatehllc:0.19936
mixRatehllc - mixRate: -0.000470
hllcMixCnt - realMixCnt: -47
hllc cost time: 128
********************begin r :{0.5},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 200000 ,r : 0.5 setA size: 100000 , setB size: 100000
generate data cost time: 121
realMixCnt: 50027 , mixRate:0.50027
Map collection cost time: 35
hllcMixCnt: 49726 , mixRatehllc:0.49726
mixRatehllc - mixRate: -0.003010
hllcMixCnt - realMixCnt: -301
hllc cost time: 137
********************begin r :{0.01},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 20000000 ,r : 0.01 setA size: 200000 , setB size: 200000
generate data cost time: 247
realMixCnt: 1991 , mixRate:0.009955
Map collection cost time: 38
hllcMixCnt: 2118 , mixRatehllc:0.01059
mixRatehllc - mixRate: 0.000635
hllcMixCnt - realMixCnt: 127
hllc cost time: 200
********************begin r :{0.05},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 4000000 ,r : 0.05 setA size: 200000 , setB size: 200000
generate data cost time: 225
realMixCnt: 10000 , mixRate:0.05
Map collection cost time: 71
hllcMixCnt: 9751 , mixRatehllc:0.048755
mixRatehllc - mixRate: -0.001245
hllcMixCnt - realMixCnt: -249
hllc cost time: 273
********************begin r :{0.1},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 2000000 ,r : 0.1 setA size: 200000 , setB size: 200000
generate data cost time: 224
realMixCnt: 19974 , mixRate:0.09987
Map collection cost time: 71
hllcMixCnt: 19810 , mixRatehllc:0.09905
mixRatehllc - mixRate: -0.000820
hllcMixCnt - realMixCnt: -164
hllc cost time: 300
********************begin r :{0.2},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 1000000 ,r : 0.2 setA size: 200000 , setB size: 200000
generate data cost time: 243
realMixCnt: 40093 , mixRate:0.200465
Map collection cost time: 82
hllcMixCnt: 40549 , mixRatehllc:0.202745
mixRatehllc - mixRate: 0.002280
hllcMixCnt - realMixCnt: 456
hllc cost time: 297
********************begin r :{0.5},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 400000 ,r : 0.5 setA size: 200000 , setB size: 200000
generate data cost time: 283
realMixCnt: 99874 , mixRate:0.49937
Map collection cost time: 88
hllcMixCnt: 99730 , mixRatehllc:0.49865
mixRatehllc - mixRate: -0.000720
hllcMixCnt - realMixCnt: -144
hllc cost time: 306
********************begin r :{0.01},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 50000000 ,r : 0.01 setA size: 500000 , setB size: 500000
generate data cost time: 678
realMixCnt: 5148 , mixRate:0.010296
Map collection cost time: 181
hllcMixCnt: 3895 , mixRatehllc:0.00779
mixRatehllc - mixRate: -0.002506
hllcMixCnt - realMixCnt: -1253
hllc cost time: 673
********************begin r :{0.05},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 10000000 ,r : 0.05 setA size: 500000 , setB size: 500000
generate data cost time: 820
realMixCnt: 25131 , mixRate:0.050262
Map collection cost time: 185
hllcMixCnt: 24850 , mixRatehllc:0.0497
mixRatehllc - mixRate: -0.000562
hllcMixCnt - realMixCnt: -281
hllc cost time: 647
********************begin r :{0.1},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 5000000 ,r : 0.1 setA size: 500000 , setB size: 500000
generate data cost time: 691
realMixCnt: 49911 , mixRate:0.099822
Map collection cost time: 187
hllcMixCnt: 50951 , mixRatehllc:0.101902
mixRatehllc - mixRate: 0.002080
hllcMixCnt - realMixCnt: 1040
hllc cost time: 690
********************begin r :{0.2},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 2500000 ,r : 0.2 setA size: 500000 , setB size: 500000
generate data cost time: 888
realMixCnt: 250250 , mixRate:0.5005
Map collection cost time: 212
hllcMixCnt: 249358 , mixRatehllc:0.498716
mixRatehllc - mixRate: -0.001784
hllcMixCnt - realMixCnt: -892
hllc cost time: 608
********************begin r :{0.5},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 1000000 ,r : 0.5 setA size: 500000 , setB size: 500000
generate data cost time: 840
realMixCnt: 249691 , mixRate:0.499382
Map collection cost time: 230
hllcMixCnt: 249833 , mixRatehllc:0.499666
mixRatehllc - mixRate: 0.000284
hllcMixCnt - realMixCnt: 142
hllc cost time: 714

Process finished with exit code 0
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值