文本分割为不同单词

将字符串分割出不同单词,按字典序输出

纯STL + iterator + set 版

#include <iostream>
#include <string>
#include <set>
#include <vector>
#include <algorithm>
using namespace std;

void splitString(const string& str, set<string>& ans, const string& separator)
{
	set<string::const_iterator> separator_iter_set;
	string::const_iterator separator_iter = separator.begin();

	string::const_iterator str_iter1 = str.begin();

	for (; separator_iter != separator.end(); separator_iter++)
	{
		str_iter1 = str.begin();
		string::const_iterator find_now = find(str_iter1, str.end(), *separator_iter);
		while (find_now != str.end())
		{
			separator_iter_set.insert(find_now);
			str_iter1 = ++find_now;
			find_now = find(str_iter1, str.end(), *separator_iter);
		}
	}

	set<string::const_iterator>::const_iterator iter_set_end = separator_iter_set.begin();
	set<string::const_iterator>::const_iterator iter_set_begin = iter_set_end;
	iter_set_end++;
	for (; iter_set_end != separator_iter_set.end() ;iter_set_end++,iter_set_begin++)
	{
		string sub((*iter_set_begin)+1, (*iter_set_end));
		if (sub != "")
		{
			ans.insert(sub);
		}	
	}
	
	string first_word(str.begin(), *(separator_iter_set.begin()));
	if (first_word != "")
	{
		ans.insert(first_word);
	}
}

int main()
{
	set<string> answer;
	string sentence = "  This is an red apple or a green     apple?Maybe it is red, maybe it is green.  ";
	transform(sentence.begin(), sentence.end(), sentence.begin(), ::tolower);
	splitString(sentence, answer, "?,. ");

	for (set<string>::iterator it = answer.begin(); it != answer.end(); it++)
	{
		cout << *it << endl;
	}
}

手写搜索版 + tolower +unique + sort

#include <iostream>
#include <string>
#include <set>
#include <vector>
#include <algorithm>
using namespace std;

void splitString(const string& str, set<string>& ans, const string& separator)
{
	vector<string::size_type> separate_pos;

	for (string::size_type str_move_pos = 0; str_move_pos < str.length(); str_move_pos++)
	{
		for (string::size_type separater_move_pos = 0; separater_move_pos < separator.length(); separater_move_pos++)
		{
			if (str[str_move_pos] == separator[separater_move_pos])
			{
				separate_pos.push_back(str_move_pos);
			}
		}
	}
	unique(separate_pos.begin(), separate_pos.end());
	sort(separate_pos.begin(), separate_pos.end());
	for (vector<string::size_type>::size_type separate_pos_index = 0; separate_pos_index < separate_pos.size() - 1; separate_pos_index++)
	{
		string temp = str.substr(separate_pos[separate_pos_index] + 1, separate_pos[separate_pos_index + 1] - separate_pos[separate_pos_index] - 1);
		if (temp != "")
		{
			ans.insert(temp);
		}
	}

	if (str[0] == ' ')
	{
		return;
	}
	string first_word = str.substr(0, separate_pos[0] + 1);
	if (first_word != "")
	{
		ans.insert(first_word);
	}
}

int main()
{
	set<string> ans;
	string sentence = "   This is an red apple or a green     apple?Maybe it is red, maybe it is green.  ";

	for (string::size_type i = 0; i < sentence.length(); i++)
	{
		sentence[i]=tolower(sentence[i]);
	}
	splitString(sentence, ans, "?,. ");

	for (set<string>::iterator it = ans.begin(); it != ans.end(); it++)
	{
		cout << *it << endl;
	}
}
set 相当于 vector 加 unique 加 sort
transform 相当于 对每个char tolower

最古老 全手写 + strtok/strcpy + 插入排序 版

#include <string>
#include <cstring>
#include <vector>
#include <iostream>
using namespace std;

void to_lower(char * k)
{
	for(int i=0;i<strlen(k);i++)
	{
		if(k[i]>='A'&&k[i]<='Z')
		{
			k[i]=k[i]-'A'+'a';
		}
	}
}

bool judge_in(const vector<string> &k,string s)
{
	for(int i=0;i<k.size();i++)
	{
		if(k[i]==s)
		{
			return true;
		}
	}
	return false;
}

void add(vector<string> &v,string s)
{
	if(!judge_in(v,s))
	{
		v.push_back(s);
	}
}

void Sort(vector<string> &v)
{
	string insert;
	for(int move=1;move<v.size();move++)
	{
		insert=v[move];
		while(move-1>=0&&v[move-1]>insert)
		{
			v[move]=v[move-1];
			move--;
		}
		v[move]=insert;
	}
}
int main()
{
	char sentence[]="   This is an red apple or a green     apple?Maybe it is red, maybe it is green.  ";
	char* p;
	char single_word[100];
	vector<string> all_word;
	string str_word;
	
	p=strtok(sentence,",. ?");
	
	while(p)
	{
		strcpy(single_word,p);
		to_lower(single_word);
		
		str_word=single_word;
		add(all_word,str_word);
		p=strtok(NULL,",?. ");
	}

	Sort(all_word);
	
	for(int i=0;i<all_word.size();i++)
	{
		cout<<all_word[i]<<endl;
	}
} 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值