单词拆分问题

最新推荐文章于 2023-03-17 11:06:52 发布

MRHEEHEE

最新推荐文章于 2023-03-17 11:06:52 发布

阅读量145

点赞数

原文链接：https://leetcode-cn.com/problems/word-break/

版权

KMP算法字典匹配单词拼接回溯法预处理

关键词由CSDN通过智能技术生成

给你一个字符串 s 和一个字符串列表 wordDict 作为字典。请你判断是否可以利用字典中出现的单词拼接出 s 。

注意：不要求字典中出现的单词全部都使用，并且字典中的单词可以重复使用。

示例 1：

输入: s = "leetcode", wordDict = ["leet", "code"]
输出: true
解释: 返回 true 因为 "leetcode" 可以由 "leet" 和 "code" 拼接成。
示例 2：

输入: s = "applepenapple", wordDict = ["apple", "pen"]
输出: true
解释: 返回 true 因为 "applepenapple" 可以由 "apple" "pen" "apple" 拼接成。
注意，你可以重复使用字典中的单词。
示例 3：

输入: s = "catsandog", wordDict = ["cats", "dog", "sand", "and", "cat"]
输出: false

提示：

1 <= s.length <= 300
1 <= wordDict.length <= 1000
1 <= wordDict[i].length <= 20
s 和 wordDict[i] 仅有小写英文字母组成
wordDict 中的所有字符串互不相同

来源：力扣（LeetCode）
链接：https://leetcode-cn.com/problems/word-break

解题思路：

对原字典进行预处理，得到一个新字典. （1）统计每个单词在主串中出现的所有位置(用KMP算法），如果从未出现，则直接筛除。（2）按每个单词位置，在主串上进行打点（单词覆盖范围内逐点打点）（3）提前预存每个单词的长度，避免后续多次调用strlen函数（时间能省就省）对新字典进行如下预处理（1）统计每个字母出现的次数（2）统计每个字母所在单词的个数，如果单词个数为唯一，则将该单词加入提前占位的候选列表（3）建立首字母相同的单词列表（便于快速查找）对主串进行如下处理（1）统计每个字母出现的次数根据预处理结果，进行初步判断（1）如果字典中未包含主串中的某个字母，则直接返回false (2) 根据第1步中的打点结果，如果每个位置上打点数均为1，则直接返回true 根据提前占位候选列表，确定每个占位块的位置，之后对所有占位块按照位置进行升序排列（由于数量较少，冒泡排序即可）根据占位块信息，将主串划分为若干个空闲区间，之后对空闲区间按长度进行升序排列（长度越短，越容易提前检查出问题）用回溯法对空闲区间进行逐一处理。

#include<stdio.h>
#include<string.h>
#include<malloc.h>

typedef unsigned short uint16;

void getNext(char *s, int *next, int *nextSize)
{	
	int j=0,k=-1;
	next[0]=-1;
	while(s[j]!='\0')
	{
		if((k>=0&&s[j]==s[k])||k==-1)
		{
			j++;
			k++;
			if(s[j]!=s[k])
				next[j]=k;
			else
				next[j]=next[k];
		}
		else
		{
			k=next[k];
		}
	}
	
	*nextSize=j;
}

bool indexKMP(char *s, char *t, int *begin, int *end, int pos)
{
	int i,j;
	int next[1000];
	int nextSize;
	
	getNext(t,next,&nextSize);
	i=pos;
	j=0;
	while(s[i]!='\0'&&t[j]!='\0')
	{
		if(s[i]==t[j])
		{
			i++;
			j++;
		}
		else 
		{
			j=next[j];
		}		
		if(j<0)
		{
			i++;
			j++;
		}
	} 
	if(t[j]=='\0')
	{
		*begin=i-nextSize;
		*end=i-1;
		
		return true; 
	} 
	return false;
}

#define MAX_ROW_SIZE  300
#define MAX_COL_SIZE  50 
#define MAX_WORD_LIST_SIZE 40
#define MAX_QUEUE_SIZE 50
#define MAX_WORD_SIZE 50
#define MAX_MARK_BLOCK_NUM 20

void tryWordBreak(char * s, uint16 maxStrLen, char ** wordDict, int wordDictSize, int curLen, uint16 firstLetterArr[26][MAX_ROW_SIZE], uint16 firstLetterCnt[26],  bool *flag, uint16 *wordLen)
{
	if(*flag)
		return;
	if(curLen == maxStrLen)
	{
		*flag = true;
		return;
	}
	
	int i,j;
	int tmpIdx;
	
	for(i=0; i<firstLetterCnt[s[curLen]-'a']; i++)
	{
		tmpIdx = firstLetterArr[s[curLen]-'a'][i];

                if(!strncmp(&s[curLen], wordDict[tmpIdx], wordLen[tmpIdx]))
		{
			tryWordBreak(s, maxStrLen, wordDict, wordDictSize, curLen+wordLen[tmpIdx], firstLetterArr, firstLetterCnt, flag, wordLen);
				
		}
		
	}
}

//对每个提前占位块按照位置进行升序排列
void bubbleSort1(uint16 a[MAX_MARK_BLOCK_NUM][2], uint16 n)
{
	int i,j;
	bool change=true;
	
	for(i=n-1; change&&i>0; i--)
	{
		change = false;
		for(j=0;j<i;j++)
		{
			if(a[j][0] > a[j+1][0])
			{
				uint16 temp[2];
				change = true;
				memcpy(temp,a[j],sizeof(temp));
				memcpy(a[j],a[j+1],sizeof(temp));
				memcpy(a[j+1],temp,sizeof(temp));
				
			}
		}
	}
} 
//
//对每个区间按照长度进行升序排列
//
void bubbleSort2(uint16 a[MAX_MARK_BLOCK_NUM][2], uint16 n)
{
	int i,j;
	bool change=true;
	
	for(i=n-1; change&&i>0; i--)
	{
		change = false;
		for(j=0;j<i;j++)
		{
			if((a[j][1]- a[j][0]) > (a[j+1][1] - a[j+1][0]))
			{
				uint16 temp[2];
				change = true;
				
				memcpy(temp,a[j],sizeof(temp));
				memcpy(a[j],a[j+1],sizeof(temp));
				memcpy(a[j+1],temp,sizeof(temp));
				
			}
		}
	}
} 

bool wordBreak(char * s, char **oldwordDict, int oldwordDictSize){
		
	int i,j;
	int tmpIdx, tmpLen;

	uint16 firstLetterCnt[26]={0};
	uint16 count1[26]={0};                      //主串中字母出现计数 
	uint16 count2[26]={0};                      //字典中字母出现计数 
	uint16 firstLetterArr[26][MAX_ROW_SIZE];    //首字母索引表 
	
	uint16 candidates[MAX_WORD_LIST_SIZE];      //提前占位的候选单词列表
	uint16 candidatesLen=0;
	
	bool candidatesFlag[MAX_WORD_LIST_SIZE]={0};      
	
	uint16 letterParentCnt[26]={0};           //字母所在单词计数 
	uint16 letterParent[26][MAX_ROW_SIZE];
	
	uint16 posArrCnt[MAX_ROW_SIZE]={0};  //每个位置的单词计数
	
	uint16 maxStrLen=strlen(s);  //主串长度，提前预存，避免多次调用strlen函数
	uint16 wordLen[MAX_WORD_LIST_SIZE]={0}; //每个单词长度，提前预存，避免多次调用strlen函数

        uint16 wordsPosCnt[MAX_WORD_LIST_SIZE]={0}; //每个单词在主串中出现次数
        uint16 **wordsPos; //每个单词的出现位置

	char ** wordDict;   //过滤掉无用单词后的新词典
	int wordDictSize=0;
	wordDict=(char **)malloc(MAX_WORD_LIST_SIZE *sizeof(char *));

        wordsPos = (uint16 **)malloc(MAX_WORD_LIST_SIZE * sizeof(uint16 **));
	for(int i=0; i<MAX_WORD_LIST_SIZE; i++)
		wordsPos[i] = (uint16 *)malloc(MAX_COL_SIZE*sizeof(uint16 *));	
		

	
	int cnt;
	int pos;
	int begin;
	int end; 
	int maxLen;
	int remainLen=strlen(s);
	int len=0;

       //
       //搜索每个单词在主串上出现的位置，过滤掉不会出现的单词
       // 
	bool flag = true;
	for(int i=0;i<oldwordDictSize;i++)
	{
	    pos=0;
		begin=0;
		end=0;
		while(indexKMP(s, oldwordDict[i], &begin, &end, pos))
		{
			if(!wordsPosCnt[len] && flag)
			{
				wordLen[len] = end - begin + 1;
				wordDict[len] = (char *)malloc((strlen(oldwordDict[i])+1)*sizeof(char));
				wordDictSize++;
				len++;
				flag = false;
			}
			
			wordsPos[len-1][wordsPosCnt[len-1]++] = begin;
			
			strcpy(wordDict[len-1], oldwordDict[i]);
			
			pos=begin+1;

			for(int j=begin; j<=end; j++)   //在主串上打点
				posArrCnt[j]++;
		}
		
		flag = true;

    }
    

    //判断主串上每个位置上的打点记数是否唯一，如果是，则直接返回true。
    int count=0;
	for(int i=0;i<strlen(s);i++)
	{
		if(posArrCnt[i]==1)
			count++;
	}

	if(count==strlen(s))
	{
		return true;
	}

       //统计主串中的字母出现次数
	for(i=0;s[i]!='\0';i++)
	{
		count1[s[i]-'a']++; 
	}
	
       //处理字典，1.建立首字母单词索引表  2.统计所有字母的出现次数   3. 统计每个字母所在的单词个数
	for(i=0;i<wordDictSize;i++)
	{
		bool tmpCount[26]={0};
		tmpIdx = wordDict[i][0]-'a';
		firstLetterArr[tmpIdx][firstLetterCnt[tmpIdx]++] = i;
		for(j=0; wordDict[i][j]!='\0';j++)
		{
			tmpCount[wordDict[i][j]-'a']++;
			count2[wordDict[i][j]-'a']++;	
		}
		
		for(j=0;j<26;j++)
		{
			if(tmpCount[j])
			{ 
				letterParent[j][letterParentCnt[j]++] = i;
			}
		}
        
	}

   	for(i=0;i<26;i++) //如果字典中不包含主串的字母，则直接返回false
	{
		if(count1[i] && !count2[i]) 
			return false;
	}
	
    //
    // 处理能在主串上提前占位的单词
    // 候选条件是：包含某字母的单词是唯一的，并且主串中包含该字母
   //
    for(i=0;i<26;i++)
	{
		if(letterParentCnt[i]==1 && count1[i])    
		{
			if(!candidatesFlag[letterParent[i][0]])
			{
				candidates[candidatesLen++]=letterParent[i][0];
				candidatesFlag[letterParent[i][0]]=1;
			}
	
		}	 
	}

    //
    // 根据候选单词确定主串上的占位块
    //
	uint16 markBlockArr[MAX_MARK_BLOCK_NUM][2];  //0:起点  1：终点
	uint16 markBlockNum=0; 
	
	markBlockArr[0][1]=maxStrLen;
	
	for(i=0;i<candidatesLen;i++)
	{
		int tmp=candidates[i];
		
		for(int j=0; j<wordsPosCnt[tmp]; j++)
		{
			markBlockArr[markBlockNum][0] = wordsPos[tmp][j];
			markBlockArr[markBlockNum][1] = wordsPos[tmp][j] + wordLen[tmp];
			markBlockNum++;
					
		}
	}

    // 将所有占位块按照在主串上的位置进行升序排列
	if(markBlockNum>1)	
		bubbleSort1(markBlockArr,markBlockNum);

    // 根据占位块，将主串分割为若干空闲区间
	uint16 vacantSectionArr[MAX_MARK_BLOCK_NUM][2];
	uint16 vacantSectionNum=0;
		
	if(markBlockNum >0)
	{
		int k=0;
		int startPos=0;
		
		for(i=0; startPos<maxStrLen && i<markBlockNum; i++)
		{
			if(markBlockArr[i][0] > startPos)
			{
				vacantSectionArr[k][0] = startPos;
				vacantSectionArr[k][1] = markBlockArr[i][0];
				startPos = markBlockArr[i][1];
				k++;
			}
			else if(markBlockArr[i][0] == startPos)
			{
				startPos = markBlockArr[i][1];
			}
		}
		
		if(startPos < maxStrLen)
		{
			vacantSectionArr[k][0] = startPos;
			vacantSectionArr[k][1] = maxStrLen;
			k++; 
		}
		
		vacantSectionNum=k;	
    }
    else
	{
		vacantSectionArr[0][0] = 0;
		vacantSectionArr[0][1] = maxStrLen; 
		vacantSectionNum = 1;
	}
	
    //将空闲区间按照长度进行升序排列
	if(vacantSectionNum > 1)
		bubbleSort2(vacantSectionArr, vacantSectionNum);


    //用回溯法逐一处理每个空闲区间
	for(i=0;i<vacantSectionNum;i++)
	{
    	flag = false;
	
		int curLen=vacantSectionArr[i][0];
		int maxLen=vacantSectionArr[i][1];
	
		tryWordBreak(s, maxLen, wordDict, wordDictSize, curLen, firstLetterArr, firstLetterCnt,  &flag, wordLen);

		if(!flag)
			break;
	}
	
	if(i==vacantSectionNum)
		return true;
	return flag;
}

MRHEEHEE

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
单词拆分问题

给你一个字符串 s 和一个字符串列表 wordDict 作为字典。请你判断是否可以利用字典中出现的单词拼接出 s 。注意：不要求字典中出现的单词全部都使用，并且字典中的单词可以重复使用。示例 1：输入: s = "leetcode", wordDict = ["leet", "code"]输出: true解释: 返回 true 因为 "leetcode" 可以由 "leet" 和 "code" 拼接成。示例 2：输入: s = "applepenapple", wordDict =
复制链接

扫一扫