美团北斗计划-推荐算法研发(大数据与算法部门)
20190805初试(1.5h)
手撕代码:最小的k个数
#利用快排的思想,O(N),缺点:修改原有数组
class Solution:
def GetLeastNumbers_Solution(self, tinput, k):
# write code here
#利用快排的思想
if tinput is None or k>len(tinput) or k<=0:
return []
start=0
end=len(tinput)-1
index=self.sort_quick(tinput,start,end)
while index!=k-1:
if index>k-1:
index=self.sort_quick(tinput,start,index-1)
if index<k-1:
index=self.sort_quick(tinput,index+1,end)
res=sorted(tinput[:k])
return res
#return tinput[:k]
def sort_quick(self,numbers,start,end):
if start>end:
return
low=start
high=end
pivot=numbers[low]
while start<end:
while (start<end) and (numbers[end]>=pivot):
end-=1
while (start<end) and (numbers[start]<=pivot):
start+=1
numbers[start],numbers[end]=numbers[end],numbers[start]
numbers[low],numbers[start]=numbers[start],numbers[low]
return start
#维护一个大根堆,O(NlogK),优点:适合海量数据,不修改原有数组
from collections import deque
class Solution:
def sort_heap(self,numbers):
if numbers is None:
return
numbers=deque(numbers)
numbers.appendleft(0)
leaf_nums=(len(numbers)-1)//2
#循环遍历有叶子结点的结点,将原有堆调整为小根堆
for i in range(1,leaf_nums+1)[::-1]:
self.heap_adjust(numbers,i,len(numbers)-1)
#将堆顶元素依次与堆底元素进行交换并重新调整除最后一个元素外的数组为小跟堆
for j in range(len(numbers)-1):
numbers[1],numbers[len(numbers)-1-j]=numbers[len(numbers)-1-j],numbers[1]#交换元素
self.heap_adjust(numbers,1,len(numbers)-2-j)#再次调整
return [numbers[i] for i in range(1, len(numbers))]
def heap_adjust(self,numbers,start,end):
temp=numbers[start]
i=start
j=2*i
while j<=end:#表示在调整完整个树之前一直遍历
if (j<end) and (numbers[j]<numbers[j+1]):
j=j+1#保证该结点为叶子结点中较大的结点
if temp<numbers[j]:
numbers[i],numbers[j]=numbers[j],numbers[i]
i=j
j=2*i
else:
break
def GetLeastNumbers_Solution(self, tinput, k):
if tinput is None or k<=0 or k>len(tinput):
return []
tinput[:k]=self.sort_heap(tinput[:k])#建立一个最小堆,堆底元素为最大的元素
for i in range(k,len(tinput)):
if tinput[k-1]>tinput[i]:
tinput[k-1],tinput[i]=tinput[i],tinput[k-1]
tinput[:k]=self.sort_heap(tinput[:k])
return tinput[:k]
面试整理:
1项目剖析(其中穿插面试官出的业务场景题)
2机器学习相关问题:
xgb与gbdt的区别;xgb的缺点;stacking集成讲解及优势所在
3nlp相关问题
word2vec的训练过程
word2vec优点
怎么确定embedding维度