python中对称差_如何求两个字典的对称差

这是一个关于Python字典对称差(symmetric difference)的性能测试代码,比较了不同实现方法的速度。代码中定义了多个函数来计算两个字典的对称差,并使用timeit模块进行速度测试。测试在Python2.6和Python3上运行,针对不同共享键比例的字典对进行操作。
摘要由CSDN通过智能技术生成

下面是一些对各种算法进行timeit速度测试的代码。在

测试使用大小相等的成对dict。密钥是短的随机字母串,在dict之间共享密钥的比例各不相同。dict是从无序列表中构造的,因此即使它们包含许多共享密钥,这两个dict的底层哈希表结构也应该大不相同。在

共享密钥的确切数量是随机的,共享密钥的比例由make_dicts的shared参数控制。在

这段代码的主体将运行在python2.6+和python3上。我在这台机器上安装了Python2.6.6和Python3.6.0(这是一台32位的单核机器,2GB RAM运行在旧的Debian Linux派生版本上)。一些字典对称差分函数使用字典理解,这在Python2.6中不可用,因此我无法在Python2上测试这些函数。另外,elmex_dsd_py2不能在python3上运行,所以我将其注释掉。我最初也打算发布Python2.6的结果,但是我不得不减少输出以适应消息大小的限制。在#!/usr/bin/env python3

# -*- coding: utf-8 -*-

''' Dictionary symmetric difference

Speed tests of various implementations

See http://stackoverflow.com/q/42650081/4014959

Speed test code by PM 2Ring 2017.03.08

'''

from __future__ import print_function

from itertools import product

from random import random, seed, shuffle

from string import ascii_letters

from timeit import Timer

seed(163)

# The dict symmetric difference functions ------------------------------

def inbar_dsd_long(a, b):

# first make sets of the dictionary keys

keys_in_a = set(a.keys())

keys_in_b = set(b.keys())

# get the unique keys

unique_keys = keys_in_a.symmetric_difference(keys_in_b)

# start an empty dictionary

c = {}

# iterate over the keys

for key in unique_keys:

if key in a:

# if the key is from a dictionary, take the value from there.

c[key] = a[key]

else:

# the key is in b dictionary, take the value from there.

c[key] = b[key]

return c

def pm2r_dsd_py2(a, b):

return dict((k, a[k] if k in a else b[k]) for k in set(a.keys()) ^ set(b.keys()))

#def elmex_dsd_py2(a, b):

#symm_diff = set(a) ^ set(b)

#return dict((k, v) for k, v in a.items() + b.items() if k in symm_diff)

def raymond_dsd(a, b):

c = a.copy()

c.update(b)

for k in (a.keys() & b.keys()):

del c[k]

return c

def inbar_dsd_short(a, b):

return {k: a[k] if k in a else b[k] for k in

set(a.keys()).symmetric_difference(b.keys())}

def pm2r_dsd_py3(a, b):

return {k: a[k] if k in a else b[k] for k in a.keys() ^ b.keys()}

def evkounis_dsd(a, b):

res = {k:v for k, v in a.items() if k not in b}

res.update({k:v for k, v in b.items() if k not in a})

return res

def elmex_dsd_py3(a, b):

symm_diff = set(a) ^ set(b)

return {k: v for k, v in list(a.items()) + list(b.items()) if k in symm_diff}

funcs = (

inbar_dsd_long,

pm2r_dsd_py2,

#elmex_dsd_py2,

raymond_dsd,

inbar_dsd_short,

pm2r_dsd_py3,

evkounis_dsd,

elmex_dsd_py3,

)

# ----------------------------------------------------------------------

# Random key strings

all_keys = [''.join(t) for t in product(ascii_letters, repeat=3)]

shuffle(all_keys)

def make_dicts(size, shared):

''' Make a pair of dicts of length `size`, with random key strings.

`shared` is a real number 0 <= shared <= 1 giving the approximate

ratio of shared keys.

'''

a, b = [], []

keys = iter(all_keys)

shared_count = 0

for i in range(size):

ka = next(keys)

if random() < shared:

kb = ka

shared_count += 1

else:

kb = next(keys)

a.append((ka, i))

b.append((kb, i))

shuffle(a)

shuffle(b)

return dict(a), dict(b), shared_count

def verify(a, b):

''' Verify that all functions return the same result '''

results = [func(a, b) for func in funcs]

last = results[-1]

print(all(last == u for u in results[:-1]))

def time_test(loops, reps):

''' Print timing stats for all the functions '''

timings = []

for func in funcs:

fname = func.__name__

setup = 'from __main__ import a, b, ' + fname

cmd = '{0}(a, b)'.format(fname)

t = Timer(cmd, setup)

result = t.repeat(reps, loops)

result.sort()

timings.append((result, fname))

timings.sort()

for result, fname in timings:

print('{0:16} {1}'.format(fname, result))

# ----------------------------------------------------------------------

print('Verifying')

size = 1000

a, b, shared_count = make_dicts(size, 0.1)

print('size: {0}, shared count: {1}'.format(size, shared_count))

verify(a, b)

# Timeit tests

reps = 3

fmt = '\nsize: {0}, shared count: {1}, loops: {2}'

for shared in (0.1, 0.25, 0.5, 0.75, 0.9):

print('\nSHARED: {0:0.2f}'.format(shared))

#for size in (5, 10, 50, 100, 500, 1000, 5000, 10000, 50000):

for size in (10, 100, 1000, 10000):

a, b, shared_count = make_dicts(size, shared)

loops = 100000 // size

print(fmt.format(size, shared_count, loops))

time_test(loops, reps)

输出

^{pr2}$

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值