#658 Find K Closest Elements
Given a sorted integer array arr
, two integers k
and x
, return the k
closest integers to x
in the array. The result should also be sorted in ascending order.
An integer a
is closer to x
than an integer b
if:
|a - x| < |b - x|
, or|a - x| == |b - x|
anda < b
解题思路(pseudocode):
1.用常规的二分法,找到arr里与x相等或离x最近的整数。
2.以这个数为轴,其 idx ± k 后框出备选范围[idx-k: idx+k+1];edge case,可能idx-k后小于0,或者idx+k+1后超出len(arr)。这两种情况取0或者len(arr)-1。取备选范围的所有数与x的差值,以及其原来的idx信息,新生成一个list of tuples。
3.再把这个list根据差值从小到大sorted,组成一个dict,key是差值,val是list of对应的idx。
4.循环这个dict,直到取够k个idx为止。key是从小到大排列的,所以当目前val内的idx个数小于k时,则全部取完。若大于k,则只取部分。取idx的规则是看val这个list有没有被x断开。如果x在val之外或者是头尾,则从x近侧取值。比如,在左侧则从小到大取,在右侧就从大到小取。如果被断开了,就是从断裂处的左侧从大往小取,取完了断裂处左侧的idx再取右侧的。
global arr, k, x, left, right, diff_list
arr = [1,3,3,4,5,7,7,8,8,8]
k = 6
x = 6
# find the closest integer: mid is its index
def find_the_closest_integer(arr, x):
left, right = 0, len(arr) - 1
while right >= left:
mid = (left + right) // 2
# print(mid)
if arr[mid] == x:
break
elif arr[mid] > x: # x is on the left
right = mid - 1
else: # x is on the right
left = mid + 1
print(mid)
# either mid is the index of the first item after the gap if x is not in the list
# or the index of the first x in the list
return mid
# narrow down the scope to find k closest integers
# diff_list is a list of tuple with index and diff
def potential_scope(arr, k, x, mid):
diff_list = []
if mid - k >= 0:
start = mid - k
else:
start = 0
if mid + k + 1 <= len(arr) - 1:
end = mid + k + 1
else:
end = len(arr) - 1
# print(start, end)
for i in range(start, end+1):
diff_list.append((i, abs(arr[i] - x)))
# print(diff_list)
return diff_list
# rearrange the scope in a dict based on diff starting from zero
def pool(diff_list):
pool = sorted(diff_list, key=lambda i: i[1])
# print(pool)
pre_diff = []
pool_dict = {}
for idx, diff in pool:
pre_diff.append(diff)
for i in sorted(set(pre_diff)): # for each diff
# print(i)
pool_dict[i] = []
for idx, diff in pool:
if i == diff:
pool_dict[i].append(idx)
print(pool_dict)
return pool_dict
# pick integers from the pool dict
def extract(pool_dict, mid, k):
num = 0 # counter of qualified indices
selected = [] # store qualified indices
for key in pool_dict: # val is a list of indices that have same diff
val = pool_dict[key]
# print(val)
if len(selected) == k: # stop picking indices from next key
break
if num+len(val) <= k: # if picking all the elements of current val list is not enough
num += len(val) # pick all of them
selected += val
print(selected)
else: # if ... is more then required
# pick some of them
if arr[val[0]] > x: # waiting list is on the right side of first needed index
# print(key)
selected += val[:(k-num)] # pick integers from the beginning
# print(selected)
elif arr[val[-1]] < x: # the increasing array is on the left side of closest integer
selected += val[-(k-num):] # pick integers from the end
else: # it is divided by the hidden target x
left_needed_num = 0 # count the length of the left broken arr
for j in val:
if j < mid:
left_needed_num += 1
if k - num < left_needed_num: # if the left needed number is less than the number of the broken left arr
# pick integers starting from the left-hand side of the gap
if mid in val:
selected += val[val.index(mid-1 - (k - num) + 1):val.index(mid)]
else:
for i in val:
if i > mid:
gap_after_idx = val.index(i)
break
selected += val[gap_after_idx-(k-num):gap_after_idx]
# print(selected)
else: # pick all the k-num integers from the beginning
selected += val[:k - num]
# print(selected)
break # leave this for loop since selected has been fulfilled
return selected
def main(arr, k, x):
mid = find_the_closest_integer(arr, x)
diff_list = potential_scope(arr, k, x, mid)
pool_dict = pool(diff_list)
selected = extract(pool_dict, mid, k)
index = list(set(selected))
start_idx = min(index)
end_idx = max(index)
print(arr[start_idx:end_idx+1])
if __name__ == "__main__":
main(arr, k, x)
虽然通过了63个的test,但是took too long.(确实太复杂了,期间各种遇到test跑错,很多edge case。
只能看油管上这题的解法:Find K Closest Elements - Leetcode 658 - Pythonhttps://www.youtube.com/watch?v=o-YDQzHoaKM这里提到【滑动窗口】的解法。
框出备选范围后,窗口默认置放正中间(即轴在正中),然后不断判断窗口是应该左移右移还是不移。左移的条件是,窗口外的左侧位与x的diff小过窗口内最后一位的diff;右移的条件是窗口外的右侧位与x的diff小过窗口内一号位的diff。移动的幅度是备选范围的一半减去窗口的一半后得到差值的一半。
期间总是遇到走不出判断窗口该不该移该往那边移的循环,后来才想明白了,应该一开始先判断符不符合不移的条件,符合就离开循环,不符合才判断左移还是右移。而不是一上来判断左移还是右移,再把break放在else区。这样就无法走出循环。
遇到第二大类问题是总是因为窗口左右移会撞到list的边界,导致index不存在。只能先判断窗口外的左邻位或右临位在不在list里。但是这样显得特别复杂,只能先跑看能不能被accepted,回头再看看sample solution有没有更好的办法。
第三类折磨了我很久的问题是,找轴总是会跑偏。mid算出来有两种可能,一种就是轴本身,即是closest integer,另外一种是在轴的左边或者右边。这个问题我想了很久,只能用笨办法,也像窗口滑动一样,同时算出mid左右邻位的diff,然后取diff最小的为轴。
class Solution:
def findClosestElements(self, arr: List[int], k: int, x: int) -> List[int]:
def find_mid(arr):
left, right = 0, len(arr) - 1
mid, diff = 0, 0
while right > left:
mid = (left + right) // 2
diff = arr[mid] - x
# print(mid, diff)
if diff == 0:
break
elif diff > 0: # x is on the left
right = mid
else: # x is on the right
left = mid + 1
mid = (left + right) // 2
diff = arr[mid] - x
# print(mid, diff)
if mid - 1 >= 0:
diff_left = abs(arr[mid-1] - x)
# print(diff_left)
if mid + 1 <= len(arr) - 1:
diff_right = abs(arr[mid + 1] - x)
# print(diff_right)
if diff_left <= abs(diff):
mid -= 1
elif abs(diff) > diff_right:
mid += 1
else:
if diff_left <= abs(diff):
mid -= 1
else:
if mid + 1 <= len(arr) - 1:
diff_right = abs(arr[mid + 1] - x)
# print(diff_right)
if abs(diff) > diff_right:
mid += 1
# print(mid)
return mid
def potential_scope(arr, k, mid): # scope has 2k-1 elements
width = k - 1
last = len(arr)-1
if mid - width >= 0:
start = mid - width
else:
start = 0
if mid + width <= last:
end = mid + width
else:
end = last
# print(start, end)
return start, end
def window(k, mid, start, end):
if mid - k // 2 < 0:
w_start = 0
w_end = k-1
elif mid + k // 2 > len(arr)-1:
w_start = len(arr) - k
w_end = len(arr)-1
else:
w_start = mid - k // 2
w_end = w_start + k - 1
# print(w_start, w_end)
return w_start, w_end, start, end
def comparison(w_start, w_end, start, end, arr, x):
# print(w_start, start, w_end, end)
while True:
left_val = arr[w_start]
right_val = arr[w_end]
# print(start, w_start, end, w_end)
# start and end are set as lower and upper limits
if w_start != start:
outside_left_val = arr[w_start - 1]
if w_end != end:
outside_right_val = arr[w_end + 1]
if (x - left_val) <= (outside_right_val - x) and (x - outside_left_val) > (right_val - x):
break
elif (x - left_val) > (outside_right_val - x): # move the window to the right
w_start += 1
w_end += 1
elif (x - outside_left_val) <= (right_val - x): # move the window to the left
w_start -= 1
w_end -= 1
else: # w_end = end, meaning no outside_right_val
if (x - outside_left_val) > (right_val - x):
break
else: # move the window to the left
w_start -= 1
w_end -= 1
else: # w_start = start, meaning no outside_left_val
if w_end != end:
outside_right_val = arr[w_end + 1]
if (x - left_val) <= (outside_right_val - x):
break
else: # move the window to the right
w_start += 1
w_end += 1
else:
break
# print(arr[w_start:w_end+1])
return arr[w_start:w_end + 1]
mid = find_mid(arr)
start, end = potential_scope(arr, k, mid)
w_start, w_end,start, end = window(k, mid, start, end)
solution = comparison(w_start, w_end, start, end, arr, x)
return solution
中间有很多重复的语句,但是实在是没办法,为了躲避各种edge case才加上的。
runtime:
跑出来的时候我真的哭了。整整做了4天。不停地遇到各种test不过,然后一点点调整qaq
回头再补上sample solution的学习参考,今天先这样>_<
--------------------- 260ms solution-------------------------------------
有3种思路:
1)不用pointer,省去追踪index的麻烦。也不用window,也不用scope,也不用担心碰到边界超出有效值的范围。太妙了。
这个根本没有用到pointer,也就省去了追踪index的麻烦。直接对现有的arr,按abs(diff)来从小到大排列。然后从新排列好的list里取前k个值。考虑到有些diff实际上是负值,所以对取的值再sorted一次即可。
2)用bisect_left寻找轴,超级快,而且还避免了之前mid找不准closest integer的麻烦。
但最closest的值可能是bisect_left本身,也可能是左邻值。
找到之后,把左右标围绕轴挨着放,然后再把window的sie扩至有k个值。扩的标准是左标右标各自移动。如果左标已经顶原arr的左格,则只能右标往右移。如果右标已经在最后一个位,只能左标往左移。其他情况看左标值与目标值的差值与右标值与目标值的差值相比的大小。这个要考虑到有没有出界的办法还是比较麻烦。
这种做法还是太难了。等于把left和right当做取值窗口的外界。所以left才会有个判断条件是-1.考虑到了可能超界的情况。
'''
# Time complexity: O(log(N)+k).
# Space complexity: O(1)
class Solution:
def findClosestElements(self, arr: List[int], k: int, x: int) -> List[int]:
# Base case
if len(arr) == k:
return arr
# Find the closest element and initialize two pointers
left = bisect_left(arr, x) - 1
right = left + 1
# While the window size is less than k
while right - left - 1 < k:
# Be careful to not go out of bounds
if left == -1:
right += 1
continue
# Expand the window towards the side with the closer number
# Be careful to not go out of bounds with the pointers
if right == len(arr) or abs(arr[left] - x) <= abs(arr[right] - x):
left -= 1
else:
right += 1
# Return the window
return arr[left + 1:right]
3)左标右标,传统做法,从list两端开始。取中值之后,直接window从mid开始,于mid+k结束。然后看要不要左右滑动。这样不用担心出界的问题。
class Solution:
def findClosestElements(self, arr: List[int], k: int, x: int) -> List[int]:
# Initialize binary search bounds: possible start point range: left, right
left = 0
right = len(arr) - k
# Binary search against the criteria described
while left < right:
mid = (left + right) // 2
if x - arr[mid] > arr[mid + k] - x: # move to right for a new window
left = mid + 1
else: # move to left for a new window
right = mid
return arr[left:left + k]
而且right的取值有讲究,必须是原arr的长度-k,这样就不会造成mid+k出界。还有要注意的是,这里之所以不以abs(arr[mid]-x) > abs(arr[mid+k]-x)为判别标准,是为了避免发生目标值不在arr[mid:mid+k]的情况。比如[1,2,2,2,2,2,3,3], k=3, x=3.
left 为0,right为6.mid为3.第一回合,会出现abs(arr[mid]-x)=abs(arr[mid+k]-x)的情况。都是(2-3 )= -1. 而导致right取值mid,把目标值排除在arr[mid:mid+k]之外了。
这题非常值得经常复习。再重新写着练习和理解。都经常会发现自己有地方没有理解到。