最常见的程序员面试题(10)字符串的处理

最新推荐文章于 2022-11-04 23:58:48 发布

zealot_2002

最新推荐文章于 2022-11-04 23:58:48 发布

阅读量741

点赞数

分类专栏：数据结构&算法

数据结构&算法专栏收录该内容

26 篇文章 0 订阅

订阅专栏

字符串的搜索，匹配，查找，压缩，编码/解码，是一类非常常见的问题。

(1) 压缩一个字符串当中的空格,例如把" I like csdn "压缩成"I like csdn"。注意，单词之间的空格，N个空格要压缩到1个，也就是要减少N-1个空格。行首/行尾的空格全部去掉。
算法复杂度O(n)，扫描一遍，在扫描的过程中记住当前已经有了多少个多余的空格(pos)，然后把非空格的字符都向前移动pos个字符。处理完了整个字符串以后再从行尾往前扫描到底一个非空格字符，再其后面填上'\0'，工作完成。

[cpp] view plain copy print ?

#include<string>
using namespace std;
void f(char* str){//输入/输出参数
if(str==nullptr || str[0]=='\0')return;//错误处理
size_t len = strlen(str);
size_t pos = 0;//当前已经压缩的空格数量
bool fSpace = false;
bool fIsTail = false;
for(size_t i = 0; i<len; ++ i){
if(str[i]==' '){
if(!fSpace){
fSpace=true;//开始空格子串
if(i==0){
++pos;//行首空格
}
}else{//有了标志位了,说明是多余空格
++pos;
}
}else{//当前不是空格
if(fSpace==true)fSpace=false;
str[i-pos]=str[i];
str[i]=' ';
}
}
while(str[--len]==' ');
str[len+1]='\0';
}
int main(){
char buf[]=" I like csdn ";
f(buf);//压缩空格
printf("%s,%d\n",buf,strlen(buf));
return 0;
}

#include<string>
using namespace std;
void f(char* str){//输入/输出参数
	if(str==nullptr || str[0]=='\0')return;//错误处理
	size_t len = strlen(str);
	size_t pos = 0;//当前已经压缩的空格数量
	bool fSpace = false;
	bool fIsTail = false;
	for(size_t i = 0; i<len; ++ i){
		if(str[i]==' '){
			if(!fSpace){
				fSpace=true;//开始空格子串
				if(i==0){
					++pos;//行首空格
				}
			}else{//有了标志位了,说明是多余空格
				++pos;
			}
		}else{//当前不是空格
			if(fSpace==true)fSpace=false;
			str[i-pos]=str[i];
			str[i]=' ';
		}
	}
	while(str[--len]==' ');
	str[len+1]='\0';
}
int main(){
	char buf[]=" I  like   csdn    ";
	f(buf);//压缩空格
	printf("%s,%d\n",buf,strlen(buf));
	return 0;
}

(2) 汉字编码的题目，例如输入一个数字串"00001204567809001"输出"一万两千零四十五亿六千七百八十万九千零一"
分析: k可以分步骤进行，先对字符串进行错误处理，然后遍历前面的0，找到真正的数字开始的地方。
计算的过程需要将整个字符串分段，8个字符一段(亿)，其中又要再次分为两段(万)，然后对于4位数进行编码

[cpp] view plain copy print ?

#include"stdafx.h"
#include <iostream>
#include <cassert>
#include <cstring>
#include <string>
#include <vector>
using namespace std;
const char yi[] = "亿";
const char wan[] = "万";
const char* table[] ={
"十","百","千",
};
const char* digits[] ={
"零","一","二","三","四","五","六","七","八","九"
};
char* preprocess(char* psz){//错误处理，预处理
if(psz==nullptr||*psz==0)return nullptr;
size_t len = strlen(psz);
size_t nIndex = 0;
for(;nIndex<len;++nIndex){
if(psz[nIndex]!='0')break;
}
if(nIndex==len-1)return nullptr;
char* pret = psz+nIndex;//去掉了字符串前面的0串
return pret;
}
size_t sum(const char*psz, size_t len){
size_t ret=0;
for(size_t s=0;s<len;++s){
ret+=psz[s];
}
return ret;
}
string sRet="";//输出字符串
void addyi(size_t len){
for(size_t n=0;n<len;++n){
sRet+=yi;
}
}
void processNdigits(const char*psz,size_t N){//处理4个数字,N<=4
assert(N<=4);
bool fZero=false;
for(size_t s=0;s<N;++s){
if(psz[s]=='0'){
if(fZero==false){
fZero=true;//连零的请款也只输出一个零
if(s!=N-1)sRet+=digits[0];//个位的0不打印
}
}else{
fZero=false;//清除0标记
sRet+=digits[psz[s]-'0'];
int index=N-2-s;
if(index>=0)
sRet+=table[index];//加上十百千的单位
}
}
}
void process4digits(const char*psz){
processNdigits(psz,4);
}
void process(char* psz){//处理算法
char* s = preprocess(psz);
if(s==nullptr){
sRet="空";
return;
}
size_t len = strlen(s);
size_t nSection = len>>3; //按照"亿"分段
char* pSection = s;
for(size_t n=0;n<=nSection;++n){ //处理每个分段
//首先计算当前段的长度和起始指针。除了第一个段长度可能比8少以外，其他都是8个字节
size_t nSectionLen=(n==0)?(len%8):8;//当前段的长度
if(nSectionLen==0)continue;
//每个分段划分成前4个数字(万位)和后4个数字
else if(nSectionLen==8){
if(sum(pSection,4)==0){//如果万位全0，那么需要给万位自身输出一个'零'
pSection+=4;
sRet+=table[0];//输出'零'
}else{//处理万位的4个数字并加上万
process4digits(pSection);
sRet+=wan;
}
pSection+=4;
process4digits(pSection);//处理千位到个位的4个数字
}else{//开始的一段数字，不够8位
if(nSectionLen>4){
size_t leadingLen = nSectionLen-4;
processNdigits(pSection,leadingLen);
nSectionLen=4;
pSection+=leadingLen;
sRet+=wan;
}
processNdigits(pSection,nSectionLen);
pSection+=nSectionLen;
}
addyi(nSection-n);
}
cout<<sRet<<endl;
}
int main()
{
char di[]="00001204567809001";
process(di);
return 0;
}

#include"stdafx.h"
#include <iostream>
#include <cassert>
#include <cstring>
#include <string>
#include <vector>
using namespace std;

const char yi[]  = "亿";
const char wan[] = "万";
const char* table[] ={
    "十","百","千",
};
const char* digits[] ={
    "零","一","二","三","四","五","六","七","八","九"
};
char* preprocess(char* psz){//错误处理，预处理
    if(psz==nullptr||*psz==0)return nullptr;
    size_t len = strlen(psz);
    size_t nIndex = 0;
    for(;nIndex<len;++nIndex){
        if(psz[nIndex]!='0')break;
    }
    if(nIndex==len-1)return nullptr;
    char* pret = psz+nIndex;//去掉了字符串前面的0串
    return pret;
}
size_t sum(const char*psz, size_t len){
    size_t ret=0;
    for(size_t s=0;s<len;++s){
        ret+=psz[s];
    }
    return ret;
}
string sRet="";//输出字符串
void addyi(size_t len){
    for(size_t n=0;n<len;++n){
        sRet+=yi;
    }
}
void processNdigits(const char*psz,size_t N){//处理4个数字,N<=4
	assert(N<=4);
    bool fZero=false;
    for(size_t s=0;s<N;++s){
        if(psz[s]=='0'){
            if(fZero==false){
                fZero=true;//连零的请款也只输出一个零
                if(s!=N-1)sRet+=digits[0];//个位的0不打印
            }
        }else{
            fZero=false;//清除0标记
            sRet+=digits[psz[s]-'0'];

			int index=N-2-s;			
			if(index>=0)
				sRet+=table[index];//加上十百千的单位
        }
    }
}
void process4digits(const char*psz){
	processNdigits(psz,4);
}
void process(char* psz){//处理算法
    char* s = preprocess(psz);
    if(s==nullptr){
		sRet="空";
        return;
	}

    size_t len = strlen(s);
    size_t nSection = len>>3; //按照"亿"分段
	char* pSection = s;
    for(size_t n=0;n<=nSection;++n){ //处理每个分段
        //首先计算当前段的长度和起始指针。除了第一个段长度可能比8少以外，其他都是8个字节
        size_t nSectionLen=(n==0)?(len%8):8;//当前段的长度
		if(nSectionLen==0)continue;

        //每个分段划分成前4个数字(万位)和后4个数字
		else if(nSectionLen==8){
			if(sum(pSection,4)==0){//如果万位全0，那么需要给万位自身输出一个'零'
				pSection+=4;
				sRet+=table[0];//输出'零'
			}else{//处理万位的4个数字并加上万
				process4digits(pSection);
				sRet+=wan;
			}

			pSection+=4;
			process4digits(pSection);//处理千位到个位的4个数字
		}else{//开始的一段数字，不够8位
			if(nSectionLen>4){
				size_t leadingLen = nSectionLen-4;
				processNdigits(pSection,leadingLen);
				nSectionLen=4;
				pSection+=leadingLen;
				sRet+=wan;
			}
			processNdigits(pSection,nSectionLen);
			pSection+=nSectionLen;
		}
        addyi(nSection-n);
    }
	cout<<sRet<<endl;
}

int main()
{
    char di[]="00001204567809001";
    process(di);
    return 0;
}

(3) 字符串编码/反编码的问题。游程编码是一类经典的问题。

[cpp] view plain copy print ?

#include"stdafx.h"
#include<Windows.h>
#include<iostream>
#include<vector>
using namespace std;
/*
* 游程编码的问题:
* 游程编码的输入可能是一个二进制串或者字符串，长度未知(因为可能很长)
* 需要用"[内容-长度]对"的方式对其进行编码。例如0000111222编码成041323
* 也就是0有4个，1有3个，2有3个，编码成041323
* 需要注意的问题是:
* (1)如果只有一个字符/一种字符的情况
* (2)结束的情况
* (3)重复个数很多的情况: 因为我们用固定的"一个字节"来表示长度
* 因此每个"[内容-长度]对"表示最大长度就是255字节。
* 更多的重复要用多个"[内容-长度]对"来表示
*/
int main()
{
char arr[]="0000111222";
struct encode{
char c;
BYTE length;
encode(char _c,BYTE _length):
c(_c),length(_length)
{}
};
vector<encode> vEncode;
char* psz=arr;
char cCurrent = '\0';
size_t nCount = 1;
while(*psz!='\0'){
cCurrent=*psz;
if(nCount==255){
vEncode.push_back(encode(cCurrent,(BYTE)nCount));
nCount=1;
}
if(* ++psz ==cCurrent){
++nCount;
continue;
}else{
vEncode.push_back(encode(cCurrent,(BYTE)nCount));
nCount=1;
}
}
for( auto it = vEncode.begin(); it != vEncode.end(); ++ it ){
cout<<it->c<<(int)it->length<<endl;
}
return 0;
}

#include"stdafx.h"
#include<Windows.h>
#include<iostream>
#include<vector>
using namespace std;
/*
 * 游程编码的问题:
 * 游程编码的输入可能是一个二进制串或者字符串，长度未知(因为可能很长)
 * 需要用"[内容-长度]对"的方式对其进行编码。例如0000111222编码成041323
 * 也就是0有4个，1有3个，2有3个，编码成041323
 * 需要注意的问题是:
 * (1)如果只有一个字符/一种字符的情况
 * (2)结束的情况
 * (3)重复个数很多的情况: 因为我们用固定的"一个字节"来表示长度
 * 因此每个"[内容-长度]对"表示最大长度就是255字节。
 * 更多的重复要用多个"[内容-长度]对"来表示
 */
int main()
{
    char arr[]="0000111222";
	struct encode{
		char c;
		BYTE length;
		encode(char _c,BYTE _length):
			c(_c),length(_length)
		{}
	};
	vector<encode> vEncode;
	char* psz=arr;
	char cCurrent = '\0';
	size_t nCount = 1;
	while(*psz!='\0'){
		cCurrent=*psz;
		if(nCount==255){
			vEncode.push_back(encode(cCurrent,(BYTE)nCount));
			nCount=1;
		}
		if(* ++psz ==cCurrent){
			++nCount;
			continue;
		}else{
			vEncode.push_back(encode(cCurrent,(BYTE)nCount));
			nCount=1;
		}
	}
	for( auto it = vEncode.begin(); it != vEncode.end(); ++ it ){
		cout<<it->c<<(int)it->length<<endl;
	}
    return 0;
}

zealot_2002

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
最常见的程序员面试题(10)字符串的处理

字符串的搜索，匹配，查找，压缩，编码/解码，是一类非常常见的问题。(1) 压缩一个字符串当中的空格,例如把" I like csdn "压缩成"I like csdn"。注意，单词之间的空格，N个空格要压缩到1个，也就是要减少N-1个空格。行首/行尾的空格全部去掉。算法复杂度O(n)，扫描一遍，在扫描的过程中记住当前已经有了多少个多余的空格(pos)，然后把非空格的字
复制链接

扫一扫