Tokenize a string

Using C

This example uses the strtok() function to separate the tokens. This function is destructive (replacing token separators with '\0'), so we have to make a copy of the string (using strdup()) before tokenizing. strdup() is not part of ANSI C, but is available on most platforms. It can easily be implemented with a combination of strlen(), malloc(), and strcpy().

#include<string.h>
#include<stdio.h>
#include<stdlib.h>

int main(void)
{
	char *a[5];
	const char *s="Hello,How,Are,You,Today";
	int n=0, nn;

	char *ds=strdup(s);

	a[n]=strtok(ds, ",");
	while(a[n] && n<4)
	{
		a[++n]=strtok(NULL, ",");
	}
	for(nn=0; nn<=n; ++nn)
	{
		printf("%s.", a[nn]);
	}
	putchar('\n');

	free(ds);

	return 0;
}

Another way to accomplish the task without the built-in string functions is to temporarily modify the separator character. This method does not need any additional memory, but requires the input string to be writeable.

#include<stdio.h>

typedef void (*callbackfunc)(const char *);

void doprint(const char *s)
{
	printf("%s.", s);
}

void tokenize(char *s, char delim, callbackfunc cb)
{
	char *olds = s;
	char olddelim = delim;
	while(olddelim && *s)
	{
		while(*s && (delim != *s))
		{
			s++;
		}
		*s ^= olddelim = *s; // olddelim = *s; *s = 0;
		cb(olds);
		*s++ ^= olddelim; // *s = olddelim; s++;
		olds = s;
	}
}

int main(void)
{
	char array[] = "Hello,How,Are,You,Today";
	tokenize(array, ',', doprint);
	return 0;
}

Using C++

std::getline() is typically used to tokenize strings on a single-character delimiter

#include <string>
#include <sstream>
#include <vector>
#include <iterator>
#include <iostream>
#include <algorithm>
int main()
{
	std::string s = "Hello,How,Are,You,Today";
	std::vector<std::string> v;
	std::istringstream buf(s);
	for(std::string token; getline(buf, token, ','); )
	{
		v.push_back(token);
	}
	copy(v.begin(), v.end(), std::ostream_iterator<std::string>(std::cout, "."));
	std::cout << '\n';
}

C++ allows the user to redefine what is considered whitespace. If the delimiter is whitespace, tokenization becomes effortless.

#include <string>
#include <locale>
#include <sstream>
#include <vector>
#include <iterator>
#include <iostream>
#include <algorithm>
struct comma_ws : std::ctype<char>
{
	static const mask* make_table()
	{
		static std::vector<mask> v(classic_table(), classic_table() + table_size);
		v[','] |= space;  // comma will be classified as whitespace
		return &v[0];
	}
	comma_ws(std::size_t refs = 0) : ctype<char>(make_table(), false, refs) {}
};
int main()
{
	std::string s = "Hello,How,Are,You,Today";
	std::istringstream buf(s);
	buf.imbue(std::locale(buf.getloc(), new comma_ws));
	std::istream_iterator<std::string> beg(buf), end;
	std::vector<std::string> v(beg, end);
	copy(v.begin(), v.end(), std::ostream_iterator<std::string>(std::cout, "."));
	std::cout << '\n';
}

Using C++:Boost

The boost library has multiple options for easy tokenization.
#include <string>
#include <vector>
#include <iterator>
#include <algorithm>
#include <iostream>
#include <boost/tokenizer.hpp>
int main()
{
	std::string s = "Hello,How,Are,You,Today";
	boost::tokenizer<> tok(s);
	std::vector<std::string> v(tok.begin(), tok.end());
	copy(v.begin(), v.end(), std::ostream_iterator<std::string>(std::cout, "."));
	std::cout << '\n';
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值