LintCode 解题记录 17.5.19 (tag: Hash表2)_lintcode 哈希+链表-CSDN博客

本文链接：https://blog.csdn.net/qq_32108329/article/details/72582111

Hash表相关的题目对我来说有点吃力了。很多题目远超规定时间仍然想不出最优解。所以看别人的最优解、然后自己消化吸收就会花不少时间。希望能提高吧。
LintCode Majority Number
给定一个数组，找出其中出现次数大于数组大小一半的数。(输入数据保证只有唯一一个满足要求的解)
直接用Hash统计出现次数，然后判断即可。O(n)的时间与空间复杂度。
问：能否仅以O(1)的空间复杂度实现？时间复杂度仍然为O(n)。
这就要求不用hash表，只用几个int变量遍历数组一次求解。这里用了Greedy的思想，即局部的majority也是整体的majority。那么什么情况下局部的majority是整体的majority呢？直接看代码：

    int majorityNumber(vector<int> nums) {
        // write your code here
        int candidate, count = 0;
        for (int i = 0; i < nums.size(); i++) {
            if (count == 0) {
                candidate = nums[i];
                count++;
            } else {
                if (candidate == nums[i]) {
                    count++;
                } else {
                    count--;
                }
            }
        }
        return candidate;
    }

从代码中可以看出，用一个count指针计算candidate的出现次数。遍历到一个数，如果此时这个数不是candidate，count–，否则count++。那么需要注意的就是考虑count==0。count==0代表了什么呢？就是在已经遍历过的数中，candidate占了一半，非candidate占了一半。那么count == 0代表剩下未遍历的数组中的majority_number就是最终的majority_number。所以直接从新开始寻找新的candidate即可。
着重理解这里majority_number > nums.size()/2这个条件的应用。然后可以尝试思考一下majority Number II 这道题。

LintCode Majority Number II
上一题其实可以这么理解:由于majority_number的出现次数>size/2，那么我可以两两丢弃一组不同的数，那么最后剩下的数一定就是答案。丢弃一组不同的数在上述代码的体现就是count–。那么，对于此题，我们可以三三丢弃，那么最后有可能剩下一个数或者两个不同的数，就需要在遍历一遍看看剩下的两个数哪个才是majority_number就好了。

    int majorityNumber(vector<int> nums) {
        // write your code here
        int count1 = 0, count2 = 0;
        int candidate1 = 0, candidate2 = 0;
        for (int i = 0; i < nums.size(); i++) {
            if (candidate1 == nums[i]) {
                count1++;
            } else if (candidate2 == nums[i]) {
                count2++;
            } else if (count1 == 0) {
                candidate1 = nums[i];
                count1 = 1;
            } else if (count2 == 0) {
                candidate2 = nums[i];
                count2 = 1;
            } else {
                //相当于丢弃三个数,即当前数、candidate1、candidate2
                count1--;
                count2--;
            }
        }
        count1 = count2 = 0;
        for (int i = 0; i < nums.size(); i++) {
            if (candidate1 == nums[i]) count1++;
            if (candidate2 == nums[i]) count2++;
        }
        return count1 > count2 ? candidate1 : candidate2;
    }

LintCode Majority Number III
这一题又是上一题的扩展。majority_number > size/k，那么我每发现k个不同的数，就将其丢弃，最后的majority_number一定就在剩下的数中。至多只可能剩下k-1个不同的数。所以这道题可以以Hash的思想在O(k)的空间复杂度下解决。这种hash思想仍然可以用于前面两题，只是k太小所以直接用两个指针来代替来这个hash。

    int majorityNumber(vector<int> nums, int k) {
        // write your code here
        unordered_map<int, int> hash;
        for (auto e : nums) {
            if (hash.size() < k-1 && hash.find(e) == hash.end()) {
                hash[e] = 1; //加入候选集合
                continue;
            }
            if (hash.find(e) == hash.end()) { //开始丢弃k个不同的数
                auto ite = hash.begin();
                while (ite != hash.end()) {
                    ite->second--; //候选数出现次数-1,代表丢弃了该数
                    if (ite->second == 0) {
                        ite = hash.erase(ite);//从候选数集合中剔除,注意这个erase函数，iterator erase(iter)，即返回一个指向iter之后的元素的迭代器。如果iter指向最后一个元素，则返回.end()
                        continue;
                    }
                    ite++;
                }
            } else {
                hash[e]++;
            }
        }
        //在候选数集合中 找到出现次数最多的即为答案
        for (auto &tmp : hash) {
            tmp.second = 0;
        }
        int maxNum = 0, ret = 0;
        for (auto e : nums) {
            if (hash.find(e) != hash.end()) {
                hash[e]++;
                if (hash[e] > maxNum) {
                    maxNum = hash[e];
                    ret = e;
                }
            }
        }
        return ret;

    }

LintCode Anagrams
给定一个字符串数组，返回其中所有的Anagrams的字符串。两个字符串是Anagrams，如果他们打乱字母顺序后可以相等。
马上就想到的思路，建立一个hash:string -> vector(string)，其中key是排序后的字符串，value是原本的字符串容器。那么遍历一遍之后，最后在遍历一遍，去掉那些size == 1的vector，把所有字符串圧到最终的vector中。
代码:

    vector<string> anagrams(vector<string> &strs) {
        // write your code here
        vector<string> ret;
        unordered_map<string, vector<string>> hash;
        for (int i = 0; i < strs.size(); i++) {
            string tmp = strs[i];
            sort(tmp.begin(), tmp.end());
            hash[tmp].push_back(strs[i]);
        }
        for (auto ite=hash.begin(); ite != hash.end(); ite++) {
            if (ite->second.size() == 1) continue;
            for (auto s : ite->second) {
                ret.push_back(s);
            }
        }
        return ret;
    }

尝试着来分析复杂度: 假设有n个字符串，每个字符串平均长度为k，那么时间复杂度O(nklogk) (第一个for循环)，第二个for循环的复杂度最多为O(n)。(即为push每一个字符串)。综合起来应该是O(nklogk)。
有一种O(n)的来判断两个字符串是不是Anagrams的方法:（由于本题hash的key是排序后的所以这种方法并不适合本题）
用一个hash统计str1中的字符的出现次数，然后遍历一遍str2，如果当前遍历到的字符的hash值>=1，就代表其出现在str1中，cnt++并且相应的hash值–，否则就肯定不是Anagrams。

LintCode copy List with random pointer
虽然AC了但总感觉怪怪的。先贴我的AC代码:用的是递归。

    RandomListNode *copyRandomList(RandomListNode *head) {
        // write your code here
        if (head == NULL) return NULL;
        RandomListNode *ret = new RandomListNode(head->label);
        ret->random = head->random;
        ret->next = copyRandomList(head->next);
        return ret;
    }
    //后来思考了一下，修改了代码，用一个hash来存储oldNode->newNode。
    unordered_map<RandomListNode*, RandomListNode*> hash;
    RandomListNode *copyRandomList(RandomListNode *head) {
        // write your code here
        if (head == NULL) return NULL;
        RandomListNode *ret = new RandomListNode(head->label);
        hash[head] = ret;
        ret->next = copyRandomList(head->next);
        if (head->random != NULL) {
            ret->random = hash[head->random];
        } else {
            ret->random = NULL;
        }
        return ret;
    }
    //上述空间复杂度为O(n)。要想达到O(1)的空间复杂度，就不能用hash表来存储。经百度，发现可以把新节点插入到老节点的后面。这样的话hash就可以用原有的next来表示。然后就可以很方便的给新节点的random域赋值。最后在把链表拆成一个新链表与一个老链表。
        RandomListNode *copyRandomList(RandomListNode *head) {
        // write your code here
        //第一步，遍历老链表，对每一个节点都新建一个节点，并把其插在老节点的后面
        RandomListNode *pold, *pnew, *poldNext, *result;
        pold = head, pnew = poldNext = result = NULL;
        while (pold) {
            RandomListNode *tmp = new RandomListNode(pold->label);
            tmp->next = pold->next;
            pold->next = tmp;
            pold = tmp->next;
            if (result == NULL) {
                result = tmp;
            }
        }
        //第二步，遍历这个新链表，更新random
        pold = head;
        while (pold) {
            if (pold->random)
                pold->next->random = pold->random->next;
            pold = pold->next->next;
        }
        //第三步，遍历这个链表，把新链表提取出来
        pold = head, pnew = result;
        while (pnew->next) {
            pnew->next = pnew->next->next;
            pnew = pnew->next;
        }
        //pnew->next = NULL;
        return result;
    }

LintCode Longest Substring Without Repeating Characters
给定一个字符串，求这个字符串没有重复字符的最长子串。
要求O(n)的时间复杂度。
字符串的子串问题，又是要求只能遍历字符串一遍，自然就会想到用两个指针法。
思路：声明一个left指针，与一个right指针，同时声明一个hash表用来判断是否出现过某字符hash:char->position。开始移动right指针，直到遍历到一个已经出现的字符，那么[left, right-1]就是某没有重复字符的子串。然后left就跳到hash[right]+1的地方,然后用新的right去更新hash[right]。注意如果遍历到某hash[right] < left，代表其也未出现在窗口内。直接拿新的right更新hash[right]即可。

    int lengthOfLongestSubstring(string s) {
        // write your code here
        unordered_map<char, int> hash;
        int mlen = 0, left = 0, right = 0, tmplen = 0;
        while (right < s.size()) {
            if (hash.find(s[right]) == hash.end() || hash[s[right]] < left) {
                hash[s[right]] = right;
                tmplen++;
                right++;
                //mlen = max(mlen, tmplen);
            } else {
                //如果发现一个已经出现的元素
                left = hash[s[right]]+1;
                tmplen = right - left + 1;
                hash[s[right]] = right;
                right++;
            }
            mlen = max(mlen, tmplen);
        }

        return mlen;
    }

LintCode Max Points on a Line
不会题。最后看了网上的思路才AC。
需要考虑几个问题:1.存在重复的点 2.存在正无穷的直线
然后二次循环遍历points数组，一般的情况是就是算得两点的斜率为k，那么hash[k]++即可。

    int maxPoints(vector<Point>& points) {
        // Write your code here
        int maxNum = 0;
        unordered_map<double, int> mp; //斜率->这条线上有几个点
        for (int i = 0; i < points.size(); i++) {
            mp.clear();
            mp[INT_MIN] = 0;//这个是为了考虑全是重复的点的情况
            int duplicate = 1;//重复的点的个数
            for (int j = 0; j < points.size(); j++) {
                if (j == i) continue;
                if (points[j].x == points[i].x && points[j].y == points[i].y) {
                    duplicate++;
                    continue;
                }
                //斜率正无穷这里设置为INT_MAX
                double k = points[i].x == points[j].x ? INT_MAX: 
                    (double)(points[j].y-points[i].y) / (points[j].x-points[i].x);
                mp[k]++;
            }
            unordered_map<double, int>::iterator ite = mp.begin();
            //这里没想到用这种方式来处理重复的点
            for (; ite != mp.end(); ite++) {
                if (maxNum < ite->second + duplicate)
                    maxNum = ite->second + duplicate;
            }
        }
        return maxNum;
    }

这题目估计二刷的时候还是做不出来- -。

LintCode Minimum Window Substring
找出source中包含target的最短子串
滑窗法。用cnt记录窗内的在target内的字符的个数，起初cnt = target.size()，每发现一个cnt–
当cnt == 0时，得到一个包含target的子串。然后更新minLen。
然后应该固定right不动，left左移，直到cnt != 0时right才能继续向又移动。

    string minWindow(string &source, string &target) {
        // write your code here
        unordered_map<char, int> hash;
        string ret;
        int left = 0, right = 0, cnt = target.size();
        int minlen = INT_MAX, fleft = 0, fright = 0;
        for (auto c : target) {
            hash[c]++;
        }
        while (right < source.size()) {
            hash[source[right]]--;
            if (hash[source[right]] >= 0) {
                cnt--;
            }
            right++;
            while (cnt == 0) {
                //该字串包含了target中的所有字符
                int currlen = right - left;
                if (currlen < minlen) {
                    minlen = currlen;
                    fleft = left;
                    fright = right;
                }
                hash[source[left]]++;
                if (hash[source[left]] >= 1) {
                    cnt++;
                }
                left++;
            }
        }
        ret = source.substr(fleft, fright-fleft);
        return ret;
    }

实际上left和right都只从左到右扫描了一遍source，所以时间复杂度为O(2*n) = O(n);

LintCode Rehasing
题目不难，但感觉自己对于链表的操作不是很熟，总会陷入卡壳的地方。比如该题中的往链表中插入一个数并返回其头节点方法，不太熟练。

    ListNode* insert(ListNode *root, int val) {
        ListNode *pre = new ListNode(0);
        pre->next = root;
        ListNode *ret = pre;
        while (pre->next != NULL) {
            pre = pre->next;
        }
        pre->next = new ListNode(val);
        return ret->next;

    }
    vector<ListNode*> rehashing(vector<ListNode*> hashTable) {
        // write your code here
        int oldCapacity = hashTable.size();
        vector<int> nums;
        for (int i = 0; i < hashTable.size(); i++) {
            ListNode *p = hashTable[i];
            while (NULL != p) {
                nums.push_back(p->val);
                p = p->next;
            }
        }
        int newCapacity = 2*oldCapacity;
        vector<ListNode*> ret(newCapacity, NULL);
        for (int i = 0; i < nums.size(); i++) {
            int hashcode = (nums[i]%newCapacity + newCapacity) % newCapacity;
            ret[hashcode] = insert(ret[hashcode], nums[i]);
        }
        return ret;
    }

LintCode Two Sum - Input array is sorted
这个简单，头尾指针遍历就好了。

    vector<int> twoSum(vector<int> &nums, int target) {
        // write your code here
        vector<int> ret;
        int left = 0, right = nums.size()-1;
        while (left < right) {
            if (nums[left] + nums[right] == target) {
                ret.push_back(left+1);
                ret.push_back(right+1);
                return ret;
            } else if (nums[left] + nums[right] > target) {
                right--;
            } else {
                left++;
            }
        }
    }