LintCode 612: K Closet Points

纸上得来终觉浅绝知此事要躬行

已于 2022-12-03 16:09:55 修改

阅读量366

点赞数

分类专栏： algorithm-design 文章标签： LintCode

于 2018-08-31 13:09:12 首次发布

本文链接：https://blog.csdn.net/roufoo/article/details/82254848

版权

algorithm-design 专栏收录该内容

817 篇文章 4 订阅

订阅专栏

K Closest Points

Given some points and an origin point in two-dimensional space, find k points which are nearest to the origin.
Return these points sorted by distance, if they are same in distance, sorted by the x-axis, and if they are same in the x-axis, sorted by y-axis.

Example
Example 1:

Input: points = [[4,6],[4,7],[4,4],[2,5],[1,1]], origin = [0, 0], k = 3
Output: [[1,1],[2,5],[4,4]]
Example 2:

Input: points = [[0,0],[0,9]], origin = [3, 1], k = 1
Output: [[0,0]]

解法1：
我用的数据结构是map< long long, multiset < Point > >，利用了map和set内部元素自动按key排序的特性。
注意：
1）这里同样的distance可能有多个点，都放在multiset里面。
2）另外，同一个点可能出现多次，都要输出而不是只输出一个。
3）distance不用开根号，因为反正是比较大小。开了根号反而不能用dist做key了，因为dist是double型。

/**
 * Definition for a point.
 * struct Point {
 *     int x;
 *     int y;
 *     Point() : x(0), y(0) {}
 *     Point(int a, int b) : x(a), y(b) {}
 * }; * Definition for a point.
 * struct Point {
 *     int x;
 *     int y;
 *     Point() : x(0), y(0) {}
 *     Point(int a, int b) : x(a), y(b) {}
 * };
 */

bool operator < (const Point &a, const Point &b) {
    if (a.x == b.x) return a.y < b.y;
    return a.x < b.x;
}

long long distance(const Point &a, const Point &b) {
    return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
}

class Solution {
public:
    /**
     * @param points: a list of points
     * @param origin: a point
     * @param k: An integer
     * @return: the k closest points
     */
    vector<Point> kClosest(vector<Point> &points, Point &origin, int k) {
        vector<Point> result;
        for (auto p : points) {
            long long dist = distance(p, origin);
            mp[dist].insert(p);
        }
        
        int count = 0;
        for (auto m : mp) {
           for (auto n : m.second) {
                if (count == k) break;
                result.push_back(n);
                count++;
            }
        }
        return result;
    }

private:
    map<long long, multiset<Point>> mp;
};

解法2：
用最大堆。 priority_queue。

/**
 * Definition for a point.
 * struct Point {
 *     int x;
 *     int y;
 *     Point() : x(0), y(0) {}
 *     Point(int a, int b) : x(a), y(b) {}
 * }; * Definition for a point.
 * struct Point {
 *     int x;
 *     int y;
 *     Point() : x(0), y(0) {}
 *     Point(int a, int b) : x(a), y(b) {}
 * };
 */
Point g_origin;

long long distance(const Point &a, const Point &b) {
    return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
}
class compare {
public:

  bool operator() (const Point &a, const Point &b) const {
      long long dist_a = distance(a, g_origin);
      long long dist_b = distance(b, g_origin);
      if (dist_a == dist_b) {
          if (a.x == b.x) return a.y < b.y;
          return a.x < b.x;
      } 
      return dist_a < dist_b;
  }
};

class Solution {
public:
    /**
     * @param points: a list of points
     * @param origin: a point
     * @param k: An integer
     * @return: the k closest points
     */
    vector<Point> kClosest(vector<Point> &points, Point &origin, int k) {
        g_origin = Point(origin);       
        priority_queue<Point, vector<Point>, compare> pq;  //最大堆
        int count = points.size();
        for (int i = 0; i < count; ++i) {
            pq.push(points[i]);
            if (pq.size() > k) pq.pop();
        }
        
        vector<Point> result;
        while(!pq.empty()) {
            result.push_back(pq.top());
            pq.pop();
        }
        
        reverse(result.begin(), result.end());
        return result;
    }
};

注意:
1)求n个数中的最小k个数用最大堆。求n个数中的最大k个数用最小堆。
为什么求最小K个数是用最大堆呢？最开始k个数构成一个最大堆。第k+1个数push进去后，如果比top大，就会替代top，这时堆的元素总个数为（k+1)，然后紧接着又把top给pop出来，这样堆里面就是最开始k+1个数里面最小的k个数了。我们按同样的步骤走完n个元素后，堆里面剩下的就是n个元素里面最小的k个数。
2） C++的priority_queue默认是最大堆(1个参数就可以了）。如果要用最小堆必须声明3个参数如下:
priority_queue < int, vector < int >, greater < int > > pq;
不过这种写法好像不能用于自定义的数据结构。如果要把自定义的数据结构用于最大堆或最小堆，有两个方法：
方法1：重载operator <。
比如说最大堆，

bool operator < (const Point &a, const Point &b) {
      long long dist_a = distance(a, g_origin);
      long long dist_b = distance(b, g_origin);
      if (dist_a == dist_b) {
          if (a.x == b.x) return a.y < b.y;
          return a.x < b.x;
      } 
      return dist_a < dist_b;
}

定义priority_queue如下即可：
priority_queue < Point > pq;

如果是最小堆，则如下定义：

struct Node {
    int value;
    int idx;
    Node (int v, int i): value(v), idx(i) {}
//  friend bool operator < (const struct Node &n1, const struct Node &n2) ;
    friend bool operator > (const struct Node &n1, const struct Node &n2) ;
}; 

inline bool operator > (const struct Node &n1, const struct Node &n2) {
    return n1.value > n2.value;
}
priority_queue<Node, vector<Node>, greater<Node> > pq; // 此时greater会调用 > 方法来确认Node的顺序，此时pq是最小堆

方法2：定义compare类。

class compare {
public:
  bool operator() (const Point &a, const Point &b) const {
      long long dist_a = distance(a, g_origin);
      long long dist_b = distance(b, g_origin);
      if (dist_a == dist_b) {
          if (a.x == b.x) return a.y < b.y;
          return a.x < b.x;
      } 
      return dist_a < dist_b; //注意这是最大堆，最小堆要用>
  }
};

注意，定义了compare类后，不管是最大堆还是最小堆，定义priority_queue都是如下：
priority_queue< Node, vector < Node >, cmp> q;

3)时间复杂度为O(nlogk)。

3刷
代码如下：

/**
 * Definition for a point.
 * struct Point {
 *     int x;
 *     int y;
 *     Point() : x(0), y(0) {}
 *     Point(int a, int b) : x(a), y(b) {}
 * };
 */
 
Point originPoint;
int distance (const Point & a, const Point & b) {
    return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
} 

bool operator < (const Point & a, const Point & b) {
    int dist1 = distance(a, originPoint);
    int dist2 = distance(b, originPoint);
    if (dist1 == dist2) {
        if (a.x == b.x) return a.y < b.y;
        else return a.x < b.x;
    } else {
        return dist1 < dist2;
    }
}

class Solution {
public:
    /**
     * @param points: a list of points
     * @param origin: a point
     * @param k: An integer
     * @return: the k closest points
     */
    vector<Point> kClosest(vector<Point> &points, Point &origin, int k) {
        int n = points.size();
        originPoint = origin;
        sort(points.begin(), points.end());
        
        priority_queue<Point> pq;
        for (int i = 0; i < n; ++i) {
            pq.push(points[i]);
            if (i >= k) {
                if (points[i] < pq.top()) pq.push(points[i]);
                pq.pop();
            }
        }
        vector<Point> results;
        while(!pq.empty()) {
            results.push_back(pq.top());
            pq.pop();
        }
        sort(results.begin(), results.end());
        return results;
    }
};

代码同步在
https://github.com/luqian2017/Algorithm

4 刷: 跟上面的原理差不多，但是有些优化。

#define dist(x, y) (x * x + y * y)
struct Node {
    int _x;
    int _y;
    Node(int x, int y) : _x(x), _y(y) {}
    bool operator < (const Node & node) const {
        return dist(_x, _y) < dist(node._x, node._y);
    }
};

class Solution {
public:
    vector<vector<int>> kClosest(vector<vector<int>>& points, int k) {
        int count = points.size();
        priority_queue<Node> maxHeap;
        for (int i = 0; i < count; i++) {
            
            if (i < k) maxHeap.push(Node(points[i][0], points[i][1]));
            else {
                if (dist(points[i][0], points[i][1]) < dist(maxHeap.top()._x, maxHeap.top()._y)) {//这个if判断可以优化，如果是已经比top还大的元素就不需要push了，不然下面又紧接着pop出来。
                    maxHeap.push(Node(points[i][0], points[i][1]));
                    maxHeap.pop();
                }
                
            }
        }
        
        vector<vector<int>> res;
        while (!maxHeap.empty()) {
            //res.insert(res.begin(), {maxHeap.top()._x, maxHeap.top()._y}); //用了这个就不用后面的sort了，但是超慢，会超时。
            res.push_back({maxHeap.top()._x, maxHeap.top()._y});
            maxHeap.pop();
        }
        sort(res.begin(), res.end());
        return res;
    }
};