# -*- coding:utf-8 -*-
'''
测试证明BSON和JSON所需要的字节数相差无几,甚至更多。
压缩算法的效果取决于其冗余程度。
'''
from __future__ import division
import collections # From Python standard library.
import datetime
test = {"name":"lemo", "age":12,
"address":{
"city":"suzhou",
"country":"china",
"code":215000} ,
#"timestamp":datetime.datetime.now(),
"scores":[{"name":"english","grade":3.0,"code":215000},
{"name":"chinese","grade":2.0,"code":215000},
{"name":"kerea","grade":3.0,"code":232000},
{"name":"france","grade":4.0,"code":235000},
{"name":"japanese","grade":4.0,"code":235000},
{"name":"kerea","grade":4.0,"code":235000},
{"name":"kerea","grade":4.0,"code":235300}
]
}
#import bson
#from bson.codec_options import CodecOptions
#data = bson.BSON.encode(test)
#print ("bson:", len(data) )
#decoded_doc = bson.BSON.decode(data)
import json
json_str = json.dumps( test )
print ("json:",len(json_str) )
###
import bz2
compressor = bz2.BZ2Compressor()
compressor.compress(json_str.encode('utf-8'))
bz2_str = compressor.flush()
print ("bz2:",len(bz2_str), len(bz2_str)/len(json_str) )
import sys
if sys.version_info.major*1000+ sys.version_info.minor>=3003:
import lzma
lzc = lzma.LZMACompressor()
out1 = lzc.compress(json_str.encode('utf-8') )
out2 = lzc.flush()
# Concatenate all the partial results:
result = b"".join([out1, out2])
print ("lzma:",len(result), len(result)/len(json_str) )
lzd = lzma.LZMADecompressor()
dec_str = lzd.decompress(result )
assert json_str.encode('utf-8') == dec_str
### msgpack
import datetime
import msgpack
#useful_dict = {
#"id": 1,
#"created": datetime.datetime.now(),
#}
def decode_datetime(obj):
if b'__datetime__' in obj:
obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
return obj
def encode_datetime(obj):
if isinstance(obj, datetime.datetime):
return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")}
return obj
packed_dict = msgpack.packb(test, default=encode_datetime)
print ("msgpack:",len(packed_dict), len(packed_dict)/len(json_str) )
#compressor = bz2.BZ2Compressor(compresslevel=1)
#compressor.compress(packed_dict)
#bz2_str = compressor.flush()
#print ("msgpack_bz2:",len(bz2_str), len(bz2_str)/len(json_str) )
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
转载于:https://my.oschina.net/cppblog/blog/408365