[H哈希] lc1948. 删除系统中的重复文件夹(字符串哈希+树的哈希+trie树上哈希+难题+周赛251_4)

最新推荐文章于 2023-12-31 16:20:59 发布

Ypuyu

最新推荐文章于 2023-12-31 16:20:59 发布

阅读量143

点赞数

分类专栏： LeetCode

本文链接：https://blog.csdn.net/yl_puyu/article/details/119081068

版权

LeetCode 同时被 2 个专栏收录

318 篇文章 1 订阅

订阅专栏

疑问

13 篇文章 0 订阅

订阅专栏

文章目录

- 1. 题目来源
- 2. 题目解析

1. 题目来源

链接：1948. 删除系统中的重复文件夹

2. 题目解析

怎么说呢，当我做完前三道，发现最后一道题还没人 Ak 的时候，我就意识到这题不属于我了…

首先树上知识学得很菜，本题应该是 trie树 并配合字符串哈希去哈希 trie的子树，用来查找。

主要思路如下（借鉴坑神思路）：

建立字典树表示文件夹结构。
对字典树每个子树进行哈希，统计相同子树出现的次数。
删除重复的子树，输出答案即可。

在此，坑神字典树建立也很讲究…双哈希值，删除标记，map<> 映射作为链接。

时间复杂度： $O (不会)$
空间复杂度： $O (n)$

代码：

我的代码。

坑神写了 1 小时…可啪…

#define ULL unsigned long long
#define PLL pair<ULL, ULL> 
#define MP(x, y) make_pair(x, y)

// 哈希随机数
const ULL MA = 1e9 + 7;
const ULL MB = 1e9 + 9;
const ULL MV = 1e5 + 7;
const ULL MVA = 1e7 + 7;
const ULL MVB = 1e7 + 9;
map<PLL, int> tCount;			// 子树结构，计数数组

const int MAXN = 2e5;
const int MAXM = 3e5;

struct Node{
    map<string, int> link;
    int siz;
    bool del;
    ULL hashA, hashB;
}node[MAXN];
ULL prime[MAXM];
bool notPrime[MAXM];
int numn, root, totPrime;

// 初始化字典树节点
void initNode(int x){
    node[x].link.clear();
    node[x].siz = 0;
    node[x].del = false;
}

// 字典树插入。根，待插入元素
void insertPath(int x, vector<string>& path){
    for (string &s: path){
        map<string, int>::iterator it = node[x].link.find(s);
        if (it != node[x].link.end()){
            x = it->second;
        }else{
            int nx = ++numn;
            initNode(nx);
            node[x].link[s] = nx;
            x = nx;
        }
    }
}

// dfs 计算子树哈希值。双哈希，传入字符串的两个哈希值
void dfs(int x, ULL strA, ULL strB, bool flag){
    if (flag) node[x].siz = 1;
    // 计算子节点的哈希值
    for (map<string, int>::iterator it = node[x].link.begin(); it != node[x].link.end(); ++it){
        ULL sA = 0, sB = 0;
        for (char c: it->first){
            ULL v = c;
            sA = (sA * MV + v) % MVB;
            sB = (sB * MV + v) % MVA;
        }
        dfs(it->second, sA, sB, flag);
        if (flag) node[x].siz += node[it->second].siz;	// 统计每个子节点的大小
    }
    
    // 子树的哈希，貌似是固定写法，双哈希值，siz 取到素数即可
    ULL hashA = 0, hashB = 0;
    for (map<string, int>::iterator it = node[x].link.begin(); it != node[x].link.end(); ++it){
        int v = it->second;
        hashA = (hashA + node[v].hashA * prime[node[v].siz]) % MA;
        hashB = (hashB + node[v].hashB * prime[node[v].siz]) % MB;
    }
    
    if (flag){
        tCount[MP(hashA, hashB)] += 1;
        node[x].hashA = (hashA + strA * prime[0]) % MA;
        node[x].hashB = (hashB + strB * prime[0]) % MB;
        //cout<<rate;
        //printf(" %llu %llu\n", node[x].hashA, node[x].hashB);
    }else{
        //cout<<rate;
        //printf(" %llu %llu %d\n", hashA, hashB, tCount[MP(hashA, hashB)]);
        if (hashA != 0 && hashB != 0 && tCount[MP(hashA, hashB)] > 1) node[x].del = true;
    }
    
}

void dfsFA(int x, vector<vector<string>>& ans, vector<string>& path){
    if (node[x].del) return;
    if (path.size() > 0) ans.push_back(path);
    
    for (map<string, int>::iterator it = node[x].link.begin(); it != node[x].link.end(); ++it){
        path.push_back(it->first);
        dfsFA(it->second, ans, path);
        path.pop_back();
    }
}

// 线性筛
void get_prime(int MAX) {
    totPrime = 0;
    memset(notPrime, false, sizeof(notPrime));
    for (int i = 2; i < MAX; i++) {
        if (!notPrime[i]) prime[totPrime++] = i;
        if (totPrime > MAXN) continue;
        for (int j = 0; j < MAX; j++) {
            if (i * prime[j] >= MAX) break;
            notPrime[i * prime[j]] = true;
            if (i % prime[j] == 0) break;
        }
    }
}

class Solution {
public:
    vector<vector<string>> deleteDuplicateFolder(vector<vector<string>>& paths) {
        tCount.clear();							// 清空记录子树的计数数组
        root = numn = 1;						// 初始化根
        initNode(root); initNode(0);			// 初始化根
        
        get_prime(MAXM);						// 初始化素数

		// 路径插入字典树
        for (vector<string>& s: paths) insertPath(root, s);
        dfs(root, 0, 0, true);		// 第一次，不删除。统计目录子结构的数量
        dfs(root, 0, 0, false);		// 第二次，删除。删除冗余的子结构答案			
        
        vector<vector<string>> ans;
        vector<string> nullPath;
        dfsFA(root, ans, nullPath);		// 拿出来未被删除的子结构目录，返回即可
        
        return ans;
    }
};

Ypuyu

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
[H哈希] lc1948. 删除系统中的重复文件夹(字符串哈希+树的哈希+trie树上哈希+难题+周赛251_4)

文章目录1. 题目来源2. 题目解析1. 题目来源链接：5826. 删除系统中的重复文件夹2. 题目解析怎么说呢，当我做完前三道，发现最后一道题还没人 Ak 的时候，我就意识到这题不属于我了…首先树上知识学得很菜，本题应该是 trie树并配合字符串哈希去哈希 trie的子树，用来查找。主要思路如下（借鉴坑神思路）：建立字典树表示文件夹结构。对字典树每个子树进行哈希，统计相同子树出现的次数。删除重复的子树，输出答案即可。在此，坑神字典树建立也很讲究…双哈希值，删除标记，map<
复制链接

扫一扫