查找给定字符串数组全组合的子串

最新推荐文章于 2024-07-19 19:11:19 发布

小小鸟的天空

最新推荐文章于 2024-07-19 19:11:19 发布

阅读量108

点赞数

分类专栏：基础算法-数据结构练习文章标签： c语言算法 leetcode 数据结构 Powered by 金山文档

本文链接：https://blog.csdn.net/skybirdcao/article/details/129654695

版权

基础算法-数据结构练习专栏收录该内容

22 篇文章 0 订阅

订阅专栏

给定一个字符串s和一个字符串数组words，找出所有串联子串的起始索引，这些子串由words中字符串按任意顺序排列连接而成。例如，输入s=barfoothefoobarman，words=[foo,bar]，输出为[0,9]。代码实现包括排序、匹配和检查匹配过程。

摘要由CSDN通过智能技术生成


/*
给定一个字符串 s 和一个字符串数组 words。 words 中所有字符串 长度相同。

 s 中的 串联子串 是指一个包含  words 中所有字符串以任意顺序排列连接起来的子串。

例如，如果 words = ["ab","cd","ef"]， 那么 "abcdef"， "abefcd"，"cdabef"， "cdefab"，"efabcd"， 和 "efcdab" 都是串联子串。 "acdbef" 不是串联子串，因为他不是任何 words 排列的连接。
返回所有串联字串在 s 中的开始索引。你可以以 任意顺序 返回答案。

 

示例 1：

输入：s = "barfoothefoobarman", words = ["foo","bar"]
输出：[0,9]
解释：因为 words.length == 2 同时 words[i].length == 3，连接的子字符串的长度必须为 6。
子串 "barfoo" 开始位置是 0。它是 words 中以 ["bar","foo"] 顺序排列的连接。
子串 "foobar" 开始位置是 9。它是 words 中以 ["foo","bar"] 顺序排列的连接。
输出顺序无关紧要。返回 [9,0] 也是可以的。
示例 2：

输入：s = "wordgoodgoodgoodbestword", words = ["word","good","best","word"]
输出：[]
解释：因为 words.length == 4 并且 words[i].length == 4，所以串联子串的长度必须为 16。
s 中没有子串长度为 16 并且等于 words 的任何顺序排列的连接。
所以我们返回一个空数组。
示例 3：

输入：s = "barfoofoobarthefoobarman", words = ["bar","foo","the"]
输出：[6,9,12]
解释：因为 words.length == 3 并且 words[i].length == 3，所以串联子串的长度必须为 9。
子串 "foobarthe" 开始位置是 6。它是 words 中以 ["foo","bar","the"] 顺序排列的连接。
子串 "barthefoo" 开始位置是 9。它是 words 中以 ["bar","the","foo"] 顺序排列的连接。
子串 "thefoobar" 开始位置是 12。它是 words 中以 ["the","foo","bar"] 顺序排列的连接。
 

提示：

1 <= s.length <= 10^4
1 <= words.length <= 5000
1 <= words[i].length <= 30
words[i] 和 s 由小写英文字母组成

来源：力扣（LeetCode）
链接：https://leetcode.cn/problems/substring-with-concatenation-of-all-words
著作权归领扣网络所有。商业转载请联系官方授权，非商业转载请注明出处。

*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

#define MAX_LEN 32

struct words_info {
    char word[MAX_LEN];
    short int times;
};

//use bisect insert to make it an order array, if same string, times++
static int sort_words_info(struct words_info words_inf[], const char **words, int words_size, int one_len)
{
    int i, j, count = 0;

    //strcpy(words_inf[0].word, words[0]);
    //words_inf[0].times = 1;

    for (i = 0; i < words_size; i++) {

        int right = 0, left = count - 1;
        while (right <= left) {

            int index = right + (left - right) / 2;
            int ret = strncmp(words[i], words_inf[index].word, one_len);

            if (ret > 0) {
                right = index + 1;
            } else if (ret < 0) {
                left = index - 1;
            } else {
                words_inf[index].times++; //if match in the pre-array, just ++1
                break;
            }
        }
    
        //matched in pre-array
        if (right <= left) {
            continue;
        }

        //move one step forward
        for (j = count; j > right; j--) {
            memcpy(&words_inf[j], &words_inf[j-1], sizeof(struct words_info));
        }
    
        strncpy(words_inf[right].word, words[i], one_len);
        words_inf[right].times = 1;    
        count++;
    }

    return count;
}

//also use bisect search to find the index to match the word
static short int match_word(const char *word, int one_len, struct words_info words_inf[], int words_count)
{
    int right = 0, left = words_count - 1;
    short int match_index = -1;

    while (right <= left) {

        int index = right + (left - right) / 2;
        int ret = strncmp(word, words_inf[index].word, one_len);

        if (ret > 0) {
            right = index + 1;
        } else if (ret < 0) {
            left = index - 1;
        } else {
            match_index = index;
            break;
        }
    }

    return match_index;
}

static bool check_match(short match_times[], short matchs[], int from, int check_count, int total_len, int one_len,
                struct words_info words_inf[], int words_count)
{
    int i, j;
    bool ret = false;

    //calc all words match times
    for (i = 0, j = from; (i < check_count) && (j < total_len); i++, j+=one_len) {
        if (matchs[j] != -1) {
            match_times[matchs[j]]++;
        } else {
            break;
        }
    }

    if (i != check_count) {
        return ret;
    }

    //i == check_count: all check_count has valid matched value, then check matched all words info
    for (i = 0; i < words_count; i++) {
        if (match_times[i] != words_inf[i].times) {
            break;
        }
    }

    if (i == words_count) {
        ret = true;
    }

    return ret;
}

/**
 * Note: The returned array must be malloced, assume caller calls free().
 */
int* findSubstring(char * s, char ** words, int wordsSize, int* returnSize)
{
    int total_len = strlen(s);
    int one_len = strlen(words[0]);
    int i = 0, j = 0;

    *returnSize = 0;
    if (total_len < (one_len * wordsSize)){
        return NULL;
    }

    struct words_info *words_inf = (struct words_info *)malloc(sizeof(struct words_info) * wordsSize);
    int words_count = sort_words_info(words_inf,(const char **)words, wordsSize, one_len);

    short int *matchs = (short int *)malloc(sizeof(short int) * total_len);

    for (i = 0; i < total_len; i++) {
        matchs[i] = match_word(s + i, one_len, words_inf, words_count);
    }

    short int *match_times = (short int *)malloc(sizeof(short int) * words_count);
    int *match_pos = (int *)malloc(sizeof(int) * total_len);

    for (i = 0; i < total_len; i++) {
        memset(match_times, 0, sizeof(short int) * words_count);
        if (check_match(match_times, matchs, i, wordsSize, total_len, one_len, words_inf, words_count)) {
            match_pos[j++] = i;
        }
    }

    free(match_times);
    free(matchs);
    free(words_inf);

    if (j == 0) {
        free(match_pos);
        return NULL;
    }

    *returnSize = j;
    return match_pos;
}

int main(int argc, char *argv[])
{
    char *s = "wordgoodgoodgoodbestword";
    char *words[] = {"word", "good", "best", "word"};

//    char *s = "barfoofoobarthefoobarman";
//    char *words[] = {"bar", "foo", "the"};

    int size = 0;
    int *ret = findSubstring(s, words, 4, &size);

    printf("[");
    for (int i = 0; i < size; i++) {
        printf("%d ", ret[i]);
    }
    printf("]\n");
    free(ret);
    return 0;
}

小小鸟的天空

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
查找给定字符串数组全组合的子串

给定一个字符串s和一个字符串数组words。words中所有字符串长度相同。s中的串联子串是指一个包含words中所有字符串以任意顺序排列连接起来的子串。例如，如果words = ["ab","cd","ef"]，那么"abcdef"，"abefcd"，"cdabef"，"cdefab"，"efabcd"，和"efcdab" 都是串联子串。"acdbef" 不是串联子串，因为他不是任何words排列的连接。返回所有串联字串在s中的开始索引。你可以以
复制链接

扫一扫