原理
- 集合为什么能够如此高效,特别是查找、插入和删除操作?
- 这当然和集合内部的数据结构密不可分。
- 不同于其他数据结构,集合的内部结构都是一张哈希表。
- 对集合来说,区别就是哈希表只有单一的元素。
实例1
# -*- coding: utf-8 -*-
import random
import time
num = 100000
listA = [random.randint(1, i) for i in range(1, num)]
setB = set()
while len(setB) < len(listA):
setB.add(random.randint(1, num))
setA = set(listA)
if __name__ == '__main__':
""" list """
t1 = time.time()
for i in xrange(0, num):
if i in listA:
pass
print time.time() - t1
""" set 1 """
t1 = time.time()
for i in xrange(0, num):
if i in setA:
pass
print time.time() - t1
""" set 2"""
t1 = time.time()
for i in xrange(0, num):
if i in setB:
pass
print time.time() - t1
146.096999884
0.018000125885
0.0239999294281
实例2
# -*-encoding:utf8 -*-
# list version
def find_unique_price_by_list(products):
unique_price_list = []
for _, _price in products:
if _price not in unique_price_list:
unique_price_list.append(_price)
return len(unique_price_list)
# set version
def find_unique_price_by_set(products):
unique_price_set = set()
for _, _price in products:
unique_price_set.add(_price)
return len(unique_price_set)
if __name__ == '__main__':
import time
ids = [x for x in range(0, 100000)]
price = [x for x in range(200000, 300000)]
product_objs = list(zip(ids, price))
# 计算列表版本的时间
start_using_list = time.perf_counter()
find_unique_price_by_list(product_objs)
end_using_list = time.perf_counter()
print("time elapse using list: {}".format(end_using_list - start_using_list))
# 计算集合版本的时间
start_using_set = time.perf_counter()
find_unique_price_by_set(product_objs)
end_using_set = time.perf_counter()
print("time elapse using set: {}".format(end_using_set - start_using_set))