先列出测试结果,后面附有测试代码,可以发现,改成Kyro序列化之后,可以节约大量空间。
JavaSerial | KyroSerial | |
Int | 81 | 2 |
empty string | 7 | 3 |
string with 1 character | 8 | 4 |
string with 2 character | 9 | 4 |
string with 10 character | 17 | 12 |
string with 20 character | 27 | 22 |
测试代码
import org.apache.spark._
import java.nio.ByteBuffer
import org.apache.spark.serializer._;
object TestDataSerialize {
def main(args: Array[String]) = {
// testKyro();
testJavaSerial()
testKyroSerial()
}
def testSerial(serialInst: SerializerInstance) = {
println("test IntObject")
for (i <- 1 to 1) {
val byteBuf = serialInst.serialize(i)
println(byteBuf.limit())
}
println("test empty string")
for (i <- 1 to 1) {
val byteBuf = serialInst.serialize("")
println(byteBuf.limit())
}
println("test string with 1 character")
for (i <- 1 to 1) {
val byteBuf = serialInst.serialize("a")
println(byteBuf.limit())
}
println("test string with 2 character")
for (i <- 1 to 1) {
val byteBuf = serialInst.serialize("ab")
println(byteBuf.limit())
}
println("test string with 10 character")
for (i <- 1 to 1) {
val byteBuf = serialInst.serialize("abcdefghij")
println(byteBuf.limit())
}
println("test string with 20 character")
for (i <- 1 to 1) {
val byteBuf = serialInst.serialize("abcdefghijabcdefghij")
println(byteBuf.limit())
}
println("test SerObject")
for (i <- 1 to 1) {
val byteBuf = serialInst.serialize(new SerObject(i, "testname", i + 1))
println(byteBuf.limit())
}
println("\n\n")
}
def testKyroSerial() = {
println("testKyroSerial")
val conf = new SparkConf().setAppName("spark util test").setMaster("local[2]");
val kryoSerializer = new KryoSerializer(conf)
val serialInst = kryoSerializer.newInstance()
testSerial(serialInst)
}
def testJavaSerial() = {
println("testJavaSerial")
val conf = new SparkConf().setAppName("spark util test").setMaster("local[2]");
val javaSerializer = new org.apache.spark.serializer.JavaSerializer(conf)
val serialInst = javaSerializer.newInstance()
testSerial(serialInst)
}
}