简单说一下,这是上海一家公司给我的两道题,先让我做出来,然后再进行进一步的沟通,但是我已经找到工作了,所以就把这道题发了过去。。
Given a array of 10,000 random intergers, select the biggest 100 numbers.
1) The order of the result numbers does not matter;
2) Take care about the algorithm performance and big O complexity.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#
#
By: 张志鹏yi
Email: qq1126918258@gmail.com
#
Date: 2014-09-03 01:06:10
from random import randint
import time
"""大致解释写的这么纠结的原因、、只是top,相对来说来时比较简单,定一个长度100的列表tmp_list,遍历一次长列表,每次再于tmp_list对比列表第一位和最后一位,如果是中间值用二分法快速重新对tmp_list
但是想得有点多了,哎,跟题目不想关,而且性能不好,算了,还是用二分法写一次吧-. - method --》 top2
top1 1.先把列表根据用户要取top值对列表进行分片,用冒泡排序反转。2.将分片排序后的列表按照列表第一个值从小到大排序,从第一个列表开始,与下一个列表取top值。
top2 1.完全使用二分法迭代列表.
top1 优点:分片处理, 分片排序后取top值快. 缺点:冒泡排序效率过低...
top2 优点:快, 可以对每次排序后的值只取top100,减少内存和下一轮多余的排序.
缺点:排序速度跟列表长度成正比. 50000 500000 5000000 50000000 时间几乎长正比 x10。
待优化:看网上说腾讯算法题目,一亿个数字取top100,有大神只用了3秒就出结果了...
我的想法,可以使用多线程,每50000个值进行一次排序.返回一个列表集. 然后再用top1中的步骤二进行快速取top值。
"""
class GetTop(object):
"""
Given a array of 10,000 random intergers, select the biggest 100 numbers.
1) The order of the result numbers does not matter;
2) Take care about the algorithm performance and big O complexity.
"""
def __init__(self, num=10000, maxnum=100000000, topnum=5):
"""
num: list lenght
maxnum
topnum
"""
self.lnum = []
self.num = num
self.tnum = topnum
self.mnum = maxnum
self.nums = []
def makelist(self):
"""make a list with choice"""
lnum = []
mnum = self.mnum
#starttime = time.time()
for i in xrange(self.num):
lnum.append(randint(0, mnum))
#endtime = time.time()
#print 'build list time: %.5f (%s, %s)' % ((endtime - starttime), starttime, endtime)
self.lnum = lnum
#print self.lnum
return lnum
def sort(self):
"""list -> lists, sort lists."""
starttime = time.time()
lnum = self.lnum
zero = self.num % self.tnum
buffer_num = self.num / self.tnum
buffer_num = buffer_num
if zero == 0 else buffer_num - 1
print "sort len: %s" % buffer_num
buffer_list = []
for i, j in enumerate(lnum[:len(lnum) / buffer_num + 1]):
newl = lnum[i * buffer_num:i * buffer_num + buffer_num]
if len(newl) != 0:
#print "old list len: %s" % len(newl)
newl = self.bubble(newl)
#print "new list len: %s\n" % len(newl)
buffer_list.append(newl)
endtime = time.time()
print 'sorted time: %.5f;' % (endtime - starttime),
return buffer_list, endtime
def top1(self):
"""
top2 num: 100; list len: 10000;
sort list time: 0.03756
top2 num: 200; list len: 10000;
sort list time: 0.04039
top2 num: 500; list len: 10000;
sort list time: 0.03842
top2 num: 100; list len: 20000;
sort list time: 0.07546
top2 num: 200; list len: 20000;
sort list time: 0.07541
top2 num: 500; list len: 20000;
sort list time: 0.07553
top2 num: 100; list len: 50000;
sort list time: 0.21948
top2 num: 200; list len: 50000;
sort list time: 0.19389
top2 num: 500; list len: 50000;
sort list time: 0.18928
top2 num: 100; list len: 500000;
sort list time: 1.86262
top2 num: 100; list len: 5000000;
sort list time: 19.22609
top2 num: 100; list len: 50000000;
sort list time: 188.99381
"""
starttime = time.time()
buffer_list, newstime = self.sort()
buffer_list = sorted(buffer_list)
buffer_list[0] = buffer_list[1][:2]
nums = []
for i in xrange(self.tnum):
if i == self.tnum:
break
if len(buffer_list[i]) == 0:
continue
first = buffer_list[i][0]
for k, j in enumerate(buffer_list[i + 1]):
if first == j:
buffer_list[i + 1] = buffer_list[i + 1][:k + 1]
break
elif first > j:
buffer_list[i + 1] = buffer_list[i + 1][:k]
break
#nums.append(buffer_list[i])
nums.extend(buffer_list[i])
nums.sort()
nums.reverse()
nums = nums[:100]
self.nums = nums
endtime = time.time()
print 'all time: %.5f; top time:%.5f' % ((endtime - starttime), (endtime - newstime))
def top2(self, lnum=[]):
lnum = lnum if lnum else self.lnum
nums = [lnum[0]]
for i in lnum:
nums = self._top2(nums, i)
return nums
#return nums[-100:]
def _top2(self, lnum=[], num=None):
#lnum = lnum if lnum else self.lnum
if not lnum:
return []
nums = []
llen = len(lnum)
if llen == 0:
return [num]
if llen == 1:
lnum.append(num)
nums = sorted(lnum)
return nums
if num >= lnum[-1]:
lnum.append(num)
return lnum
if num <= lnum[0]:
lnum.insert(0, num)
return lnum
a, b = llen / 2, llen % 2
if num == lnum[a]:
return self.insert(lnum, a, num)[-100:]
if b == 1:
#[0, "1", '2', "3", 4]
#num[a] = 2
if num > lnum[a]:
if num <= lnum[a + 1]:
nums = self.insert(lnum, a + 1, num)
return nums[-100:]
nums = lnum[:a + 2] + self._top2(lnum[a + 2:], num)
return nums[-100:]
if num < lnum[a]:
if num >= lnum[a - 1]:
nums = self.insert(lnum, a, num)
return nums[-100:]
nums = self._top2(lnum[:a - 1], num) + lnum[a - 1:]
return nums[-100:]
elif b == 0:
#[0, 1, "2", "3", 4, 5]
#lnum[a - 1] = 2
#lnum[a] = 3
if num >= lnum[a - 1] and num <= lnum[a]:
nums = self.insert(lnum, a, num)
return nums[-100:]
if num < lnum[a - 1]:
nums = self._top2(lnum[:a - 1], num) + lnum[a - 1:]
return nums[-100:]
if num > lnum[a]:
nums = lnum[:a + 1] + self._top2(lnum[a + 1:], num)
return nums[-100:]
def insert(self, lnum, index, num):
lnum.insert(index, num)
return lnum
def bubble(self, List):
"""冒泡排序, 500/1000 的列表长度,运行时间相差甚大.
top1 num: 100; list len: 10000;
sort len: 100
sorted time: 0.09721 ; all time: 0.09742; top time:0.00020
top1 num: 200; list len: 10000;
sort len: 50
sorted time: 0.05002 ; all time: 0.05037; top time:0.00035
top1 num: 500; list len: 10000;
sort len: 20
sorted time: 0.02161 ; all time: 0.02235; top time:0.00074
top1 num: 100; list len: 20000;
sort len: 200
sorted time: 0.34503 ; all time: 0.34536; top time:0.00033
top1 num: 200; list len: 20000;
sort len: 100
sorted time: 0.17856 ; all time: 0.17890; top time:0.00034
top1 num: 500; list len: 20000;
sort len: 40
sorted time: 0.07788 ; all time: 0.07863; top time:0.00075
top1 num: 100; list len: 50000;
sort len: 500
sorted time: 2.10845 ; all time: 2.10883; top time:0.00037
top1 num: 200; list len: 50000;
sort len: 250
sorted time: 1.07507 ; all time: 1.07557; top time:0.00050
top1 num: 500; list len: 50000;
sort len: 100
"""
#starttime = time.time()
for j in range(len(List) - 1, 0, -1):
for i in range(0, j):
if List[i] > List[i + 1]:
#n = i - 1 if i >= 1 else 0
#newl[i: i + 2], newl[i + 1:n:-1]
List[i], List[i + 1] = List[i + 1], List[i]
#endtime = time.time()
#print 'sort list time: %.5f (%s, %s)' % ((endtime - starttime), starttime, endtime)
List.reverse()
return List
def test(topnum=200, num=200000, maxnum=10000):
print "top1 num: " + str(topnum) + "; list len: " + str(num) + "; ",
l = GetTop(topnum=topnum, num=num, maxnum=maxnum)
l.makelist()
#l.sort()
l.top1()
print
def test1(topnum=200, num=200000, maxnum=10000):
print "top2 num: " + str(topnum) + "; list len: " + str(num) + "; ",
l = GetTop(topnum=topnum, num=num, maxnum=maxnum)
l.makelist()
#l.sort()
starttime = time.time()
l.top2()
endtime = time.time()
print 'sort list time: %.5f ' % (endtime - starttime)
def t1():
test(topnum=100, num=10000, maxnum=10000)
test(topnum=200, num=10000, maxnum=10000)
test(topnum=500, num=10000, maxnum=10000)
test(topnum=100, num=20000, maxnum=10000)
test(topnum=200, num=20000, maxnum=10000)
test(topnum=500, num=20000, maxnum=10000)
test(topnum=100, num=50000, maxnum=10000)
test(topnum=200, num=50000, maxnum=10000)
test(topnum=500, num=50000, maxnum=10000)
def t2():
test1(topnum=100, num=10000, maxnum=10000)
test1(topnum=200, num=10000, maxnum=10000)
test1(topnum=500, num=10000, maxnum=10000)
test1(topnum=100, num=20000, maxnum=10000)
test1(topnum=200, num=20000, maxnum=10000)
test1(topnum=500, num=20000, maxnum=10000)
test1(topnum=100, num=50000, maxnum=10000)
test1(topnum=200, num=50000, maxnum=10000)
test1(topnum=500, num=50000, maxnum=10000)
test1(topnum=100, num=500000, maxnum=10000)
test1(topnum=100, num=5000000, maxnum=10000)
test1(topnum=100, num=50000000, maxnum=10000)
if __name__ == '__main__':
#t1()
t2()