突然看到首页出现两篇题目极为相似的文章,一看,居然还是不同作者,看了幽梦新影兄的执行结果,突然想起自己好像也有用C语言写过类似代码。(纯属凑热闹了)
《<<字符串高级截取和统计>>一文的看法与正则表达式的实现》
《字符串高级截取和统计》(补充一下,尽然又发现一篇~)
不过不知道为什么遇到这样的功能大家就一定要想到“正则实现”呢?曾经参加微软的一个第三方培训的时候得知正则表达式的效率相当的低下,大部分的性能瓶颈均出于此,不过那个培训是高性能培训。
但值得肯定的是这个正则表达式绝对是一个优秀的实践。很多纷繁复杂的查找匹配规则通过正则表达式立刻就能迅速求解了。
下面这个C语言程序是我以前写着玩的程序,刚才突然想到了,就贴出来一同完善这个解决方案。不过不难看出用传统的字符构成字符串的思路,要写完这个类似的功能,工作量还是相当巨大的。但是用正则表达式则能够让代码简洁优美。至于效率,我个人认为其实完全可以忽略不计。
不过我这个代码的实现好像有附加了其它的一些功能,不过没关系,只增不减。
1
/*
2 * textsearch.h
3 *
4 * Created on:
5 * Author: Volnet
6 * Website: http://volnet.cnblogs.com
7 *
8 */
9
10 #ifndef TEXTSEARCH_H_
11 #define TEXTSEARCH_H_
12
13 #include < stdarg.h >
14
15 #ifndef INDEX_T_DEFINDED
16 typedef int index_t;
17 #define INDEX_T_DEFINDED
18 #endif
19
20 #ifndef COUNT_T_DEFINDED
21 typedef unsigned int count_t;
22 #define COUNT_T_DEFINDED
23 #endif
24
25 /*
26 * @return
27 * offset: the number of chars that
28 * from you found to the end of the word
29 * after you execute the search_foreach function.
30 * e.g.
31 * if you replace("abcdefgh", "de", "123");//abc45fgh;
32 * you had set offset as 3,
33 * which will let the pointer at the next char 'f';
34 * */
35 typedef void ( * search_foreach)( const char * text,
36 const char * found, index_t lengthOfFound,
37 index_t * offset,
38 va_list * paras);
39 typedef void ( * search_global)(va_list * paras, void * ret);
40 #define UNFINDED (index_t)-1
41 #define MAX_INDICES 1000;
42
43 /* declartion of functions */
44
45 /* function :To find the first char(c) in text
46 * @paras
47 * text :a pointer to the first char of the text to search
48 * c :the char to find.
49 * out_indices :(return) an array takes the indices of the tofind in the text.
50 * (value) if(out_indices == NULL), it without any exceptions.
51 * tofind :the word to find in the text.
52 * @return :the index of the char in the text.It will be 'UNFINDED' as no find.
53 * */
54 index_t indexof( const char * text, char c);
55 count_t search_c( const char * text, const char c, index_t * out_indices);
56 count_t search_s( const char * text, const char * tofind, index_t * out_indices);
57 count_t search_s_foreach(
58 /* the full text to search. */
59 const char * text,
60 /* the word to find in the text. */
61 const char * tofind,
62 /* (return) an array takes the indices of the tofind in the text.
63 * (value) if(out_indices == NULL), it without any exceptions.
64 * */
65 index_t * out_indices,
66 /* a function to gfunc() for the text.
67 * it will execute at the front of all.
68 * */
69 search_global gfunc,
70 /* the number of gfunc's parameters. */
71 const size_t gfunc_paras_count,
72 /* a function to func() for each of
73 * the word search_s from text.
74 * */
75 search_foreach func,
76 /* if gfunc!=NULL, the first parameter is the {char *ret; }
77 * ret is the parameter for return.
78 * */
79
80 );
81 /*
82 * function :replace the word 's1' to 's2' in text.
83 * @return :
84 * ret: an array who has enough space to load the result.
85 * text != ret
86 * */
87 void replace( const char * text, const char * s1, const char * s2, char ret[]);
88 /* function :remove the word 's' in text.
89 * @return :
90 * ret: an array who has enough space to load the result.
91 * text != ret
92 * */
93 void to_remove( char * text, const char * s, char ret[]);
94
95 #endif /* TEXTSEARCH_H_ */
96
2 * textsearch.h
3 *
4 * Created on:
5 * Author: Volnet
6 * Website: http://volnet.cnblogs.com
7 *
8 */
9
10 #ifndef TEXTSEARCH_H_
11 #define TEXTSEARCH_H_
12
13 #include < stdarg.h >
14
15 #ifndef INDEX_T_DEFINDED
16 typedef int index_t;
17 #define INDEX_T_DEFINDED
18 #endif
19
20 #ifndef COUNT_T_DEFINDED
21 typedef unsigned int count_t;
22 #define COUNT_T_DEFINDED
23 #endif
24
25 /*
26 * @return
27 * offset: the number of chars that
28 * from you found to the end of the word
29 * after you execute the search_foreach function.
30 * e.g.
31 * if you replace("abcdefgh", "de", "123");//abc45fgh;
32 * you had set offset as 3,
33 * which will let the pointer at the next char 'f';
34 * */
35 typedef void ( * search_foreach)( const char * text,
36 const char * found, index_t lengthOfFound,
37 index_t * offset,
38 va_list * paras);
39 typedef void ( * search_global)(va_list * paras, void * ret);
40 #define UNFINDED (index_t)-1
41 #define MAX_INDICES 1000;
42
43 /* declartion of functions */
44
45 /* function :To find the first char(c) in text
46 * @paras
47 * text :a pointer to the first char of the text to search
48 * c :the char to find.
49 * out_indices :(return) an array takes the indices of the tofind in the text.
50 * (value) if(out_indices == NULL), it without any exceptions.
51 * tofind :the word to find in the text.
52 * @return :the index of the char in the text.It will be 'UNFINDED' as no find.
53 * */
54 index_t indexof( const char * text, char c);
55 count_t search_c( const char * text, const char c, index_t * out_indices);
56 count_t search_s( const char * text, const char * tofind, index_t * out_indices);
57 count_t search_s_foreach(
58 /* the full text to search. */
59 const char * text,
60 /* the word to find in the text. */
61 const char * tofind,
62 /* (return) an array takes the indices of the tofind in the text.
63 * (value) if(out_indices == NULL), it without any exceptions.
64 * */
65 index_t * out_indices,
66 /* a function to gfunc() for the text.
67 * it will execute at the front of all.
68 * */
69 search_global gfunc,
70 /* the number of gfunc's parameters. */
71 const size_t gfunc_paras_count,
72 /* a function to func() for each of
73 * the word search_s from text.
74 * */
75 search_foreach func,
76 /* if gfunc!=NULL, the first parameter is the {char *ret; }
77 * ret is the parameter for return.
78 * */
79
80 );
81 /*
82 * function :replace the word 's1' to 's2' in text.
83 * @return :
84 * ret: an array who has enough space to load the result.
85 * text != ret
86 * */
87 void replace( const char * text, const char * s1, const char * s2, char ret[]);
88 /* function :remove the word 's' in text.
89 * @return :
90 * ret: an array who has enough space to load the result.
91 * text != ret
92 * */
93 void to_remove( char * text, const char * s, char ret[]);
94
95 #endif /* TEXTSEARCH_H_ */
96
textSearch.c
1 /*
2 * textsearch.c
3 *
4 * Created on:
5 * Author: Volnet
6 * Website: http://volnet.cnblogs.com
7 *
8 */
9
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <stdarg.h>
13 #include "../header/textsearch.h"
14
15 /*
16 * @return:
17 * the first index of which difference char of another string.
18 * */
19 static index_t compareTo_n(const char *s1, const char *s2, const count_t n);
20 static count_t length(const char *s);
21 static void replace_call(const char *text, const char *found, index_t lengthOfFound,
22 index_t *offset, va_list *paras);
23 static void strcpy_gcall(va_list *paras, void *ret);
24
25 index_t indexof(const char *text, char c) {
26 index_t index;
27 index = 0;
28
29 if (!sizeof text || !c)
30 return UNFINDED;
31
32 while (*text) {
33 if (*text == c)
34 return index;
35 index++;
36 text++;
37 }
38 return UNFINDED;
39 }
40
41 count_t search_c(const char *text, const char c, index_t *out_indices) {
42 count_t count;
43 index_t curr;
44 count = 0;
45 curr = 0;
46 while (*text) {
47 if (*text == c) {
48 count++;
49 *out_indices++ = curr;
50 }
51 curr++;
52 text++;
53 }
54 return count;
55 }
56
57 count_t search_s(const char *text, const char *tofind, index_t *out_indices) {
58 return search_s_foreach(text, tofind, out_indices, NULL, 0, NULL);
59 }
60
61 count_t search_s_foreach(const char *text, const char *tofind, index_t *out_indices,
62 search_global gfunc, const size_t gfunc_paras_count,
63 search_foreach func, ) {
64 count_t count;
65 index_t curr;
66 count_t lengthOfToFind;
67 count_t offset;
68 size_t argCount;
69 va_list ap;
70 void *ret;
71 count_t lengthOfText;
72 char *ttext;
73
74 count = 0;
75 curr = 0;
76 ttext = (char *)text;
77 lengthOfToFind = length(tofind);
78
79 if (gfunc != NULL) {
80 va_start(ap, func);
81 ret = va_arg(ap, void *);
82 gfunc(&ap, ret);
83 va_end(ap);
84 ttext = ret;
85 }
86
87 lengthOfText = length(ttext);
88 while (*ttext) {
89 if (*ttext == *tofind) {
90 if (lengthOfToFind == (offset = compareTo_n(ttext, tofind,
91 lengthOfToFind))) {
92 /*if found*/
93 if (out_indices != NULL)
94 *out_indices++ = curr;
95 count++;
96 if (func != NULL) {
97 va_start(ap, func);
98 argCount = gfunc_paras_count;
99 while (argCount--)
100 va_arg(ap, void *);
101 func(ttext, tofind, lengthOfToFind, &offset, &ap);
102 va_end(ap);
103 }
104 }
105 curr += offset;
106 ttext += offset;
107 } else {
108 curr++;
109 ttext++;
110 }
111 }
112
113 return count;
114 }
115
116 void replace(const char *text, const char *s1, const char *s2, char ret[]) {
117 if (text == ret) {
118 perror("warning: the pointer can't equals to the pointer to ret.\n");
119 return;
120 }
121 search_s_foreach(text, s1, NULL, strcpy_gcall, 2, replace_call, ret, text,
122 s2);
123 }
124
125 static void replace_call(const char *text, const char *found, index_t lengthOfFound,
126 index_t *offset, va_list *paras) {
127 char *s2 = va_arg(*paras, char *);
128 index_t off;
129 index_t lenS2;
130 count_t leaveLenText;
131 int inc;
132 char *ttext;
133
134 lenS2 = length(s2);
135 *offset = lenS2;
136 inc = 0;
137 ttext = (char *)text;
138
139 leaveLenText = length(text);
140
141 /*move data*/
142 off = lengthOfFound - lenS2;
143 if (off > 0) {
144 /*move to front*/
145 while (leaveLenText--) {
146 *(ttext + lenS2 + inc) = *(ttext + lengthOfFound + inc);
147 inc++;
148 }
149 } else if (off < 0) {
150 /*move to behind*/
151 while (leaveLenText) {
152 *(ttext + leaveLenText - off) = *(ttext + leaveLenText);
153 leaveLenText--;
154 }
155 }
156
157 /*do copy*/
158 while (*s2) {
159 *ttext++ = *s2++;
160 }
161 }
162
163 void to_remove(char *text, const char *s, char ret[]) {
164 replace(text, s, "", ret);
165 }
166
167 static void strcpy_gcall(va_list *paras, void *ret) {
168 char *target = (char *) ret;
169 char *text = va_arg(*paras, char *);
170
171 while ((*target++ = *text++))
172 ;
173 }
174
175 static index_t compareTo_n(const char *s1, const char *s2, const count_t n) {
176 count_t tn;
177 tn = n;
178 while (tn--)
179 if (*s1 || *s2)
180 if (*s1++ != *s2++) {
181 return n - tn - 1;
182 }
183 return n - tn - 1;
184 }
185
186 static count_t length(const char *s) {
187 count_t len;
188 len = 0;
189 while (*s++)
190 len++;
191 return len;
192 }
193
1 /*
2 * textsearch.c
3 *
4 * Created on:
5 * Author: Volnet
6 * Website: http://volnet.cnblogs.com
7 *
8 */
9
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <stdarg.h>
13 #include "../header/textsearch.h"
14
15 /*
16 * @return:
17 * the first index of which difference char of another string.
18 * */
19 static index_t compareTo_n(const char *s1, const char *s2, const count_t n);
20 static count_t length(const char *s);
21 static void replace_call(const char *text, const char *found, index_t lengthOfFound,
22 index_t *offset, va_list *paras);
23 static void strcpy_gcall(va_list *paras, void *ret);
24
25 index_t indexof(const char *text, char c) {
26 index_t index;
27 index = 0;
28
29 if (!sizeof text || !c)
30 return UNFINDED;
31
32 while (*text) {
33 if (*text == c)
34 return index;
35 index++;
36 text++;
37 }
38 return UNFINDED;
39 }
40
41 count_t search_c(const char *text, const char c, index_t *out_indices) {
42 count_t count;
43 index_t curr;
44 count = 0;
45 curr = 0;
46 while (*text) {
47 if (*text == c) {
48 count++;
49 *out_indices++ = curr;
50 }
51 curr++;
52 text++;
53 }
54 return count;
55 }
56
57 count_t search_s(const char *text, const char *tofind, index_t *out_indices) {
58 return search_s_foreach(text, tofind, out_indices, NULL, 0, NULL);
59 }
60
61 count_t search_s_foreach(const char *text, const char *tofind, index_t *out_indices,
62 search_global gfunc, const size_t gfunc_paras_count,
63 search_foreach func, ) {
64 count_t count;
65 index_t curr;
66 count_t lengthOfToFind;
67 count_t offset;
68 size_t argCount;
69 va_list ap;
70 void *ret;
71 count_t lengthOfText;
72 char *ttext;
73
74 count = 0;
75 curr = 0;
76 ttext = (char *)text;
77 lengthOfToFind = length(tofind);
78
79 if (gfunc != NULL) {
80 va_start(ap, func);
81 ret = va_arg(ap, void *);
82 gfunc(&ap, ret);
83 va_end(ap);
84 ttext = ret;
85 }
86
87 lengthOfText = length(ttext);
88 while (*ttext) {
89 if (*ttext == *tofind) {
90 if (lengthOfToFind == (offset = compareTo_n(ttext, tofind,
91 lengthOfToFind))) {
92 /*if found*/
93 if (out_indices != NULL)
94 *out_indices++ = curr;
95 count++;
96 if (func != NULL) {
97 va_start(ap, func);
98 argCount = gfunc_paras_count;
99 while (argCount--)
100 va_arg(ap, void *);
101 func(ttext, tofind, lengthOfToFind, &offset, &ap);
102 va_end(ap);
103 }
104 }
105 curr += offset;
106 ttext += offset;
107 } else {
108 curr++;
109 ttext++;
110 }
111 }
112
113 return count;
114 }
115
116 void replace(const char *text, const char *s1, const char *s2, char ret[]) {
117 if (text == ret) {
118 perror("warning: the pointer can't equals to the pointer to ret.\n");
119 return;
120 }
121 search_s_foreach(text, s1, NULL, strcpy_gcall, 2, replace_call, ret, text,
122 s2);
123 }
124
125 static void replace_call(const char *text, const char *found, index_t lengthOfFound,
126 index_t *offset, va_list *paras) {
127 char *s2 = va_arg(*paras, char *);
128 index_t off;
129 index_t lenS2;
130 count_t leaveLenText;
131 int inc;
132 char *ttext;
133
134 lenS2 = length(s2);
135 *offset = lenS2;
136 inc = 0;
137 ttext = (char *)text;
138
139 leaveLenText = length(text);
140
141 /*move data*/
142 off = lengthOfFound - lenS2;
143 if (off > 0) {
144 /*move to front*/
145 while (leaveLenText--) {
146 *(ttext + lenS2 + inc) = *(ttext + lengthOfFound + inc);
147 inc++;
148 }
149 } else if (off < 0) {
150 /*move to behind*/
151 while (leaveLenText) {
152 *(ttext + leaveLenText - off) = *(ttext + leaveLenText);
153 leaveLenText--;
154 }
155 }
156
157 /*do copy*/
158 while (*s2) {
159 *ttext++ = *s2++;
160 }
161 }
162
163 void to_remove(char *text, const char *s, char ret[]) {
164 replace(text, s, "", ret);
165 }
166
167 static void strcpy_gcall(va_list *paras, void *ret) {
168 char *target = (char *) ret;
169 char *text = va_arg(*paras, char *);
170
171 while ((*target++ = *text++))
172 ;
173 }
174
175 static index_t compareTo_n(const char *s1, const char *s2, const count_t n) {
176 count_t tn;
177 tn = n;
178 while (tn--)
179 if (*s1 || *s2)
180 if (*s1++ != *s2++) {
181 return n - tn - 1;
182 }
183 return n - tn - 1;
184 }
185
186 static count_t length(const char *s) {
187 count_t len;
188 len = 0;
189 while (*s++)
190 len++;
191 return len;
192 }
193
下面是调用代码:
1
/**/
/*
2 * test.c
3 *
4 * Created on:
5 * Author: Volnet
6 * Website: http://volnet.cnblogs.com
7 *
8 */
9
10 #include < stdio.h >
11 #include < stdlib.h >
12 #include " ../header/textsearch.h "
13
14 int main( void ) {
15 char *s = "this is a word, who contains \"wh\"!";
16 char c = 'w';
17
18 index_t indices_1[100];
19 index_t *indices1 = indices_1;
20 count_t count1;
21
22 char *tofind = "wh";
23 char *toremove = "onson";
24 char *fromreplace = "wh";
25 char *toreplace = "jonson";
26 index_t indices_2[100];
27 index_t *indices2;
28 count_t count2;
29 char ts[1000];
30 char rs[1000];
31
32 /**//*e.g:*/
33 /**//*index_t indexof(char *text, char c);*/
34 printf("the first index of %c is %d.\n",c,indexof(s,c));
35
36 /**//*count_t search(char *text, char c, index_t *out_indices)*/
37 count1 = search_c(s,c,indices1);
38 printf("the '%c' in the text is in : ",c);
39 while(count1--){
40 printf("%d ",*indices1++);
41 }
42 printf(".\n");
43
44 /**//*count_t search_s(char *text, const char *tofind, index_t *out_indices)*/
45 indices2 = indices_2;
46 count2 = search_s(s,tofind,indices2);
47 printf("the \"%s\" in the text is in : ",tofind);
48 while(count2--){
49 printf("%d ",*indices2++);
50 }
51 printf(".\n");
52
53 printf("the word is:\n\t%s\n",s);
54 replace(s, fromreplace, toreplace,ts);
55 printf("the word has be changed as (\"%s\"->\"%s\") :\n\t%s\n",fromreplace, toreplace, ts);
56
57 to_remove(ts,toremove,rs);
58 printf("the word remove (remove \"%s\") as : \n\t%s\n",toremove,rs);
59
60 return EXIT_SUCCESS;
61}
62
2 * test.c
3 *
4 * Created on:
5 * Author: Volnet
6 * Website: http://volnet.cnblogs.com
7 *
8 */
9
10 #include < stdio.h >
11 #include < stdlib.h >
12 #include " ../header/textsearch.h "
13
14 int main( void ) {
15 char *s = "this is a word, who contains \"wh\"!";
16 char c = 'w';
17
18 index_t indices_1[100];
19 index_t *indices1 = indices_1;
20 count_t count1;
21
22 char *tofind = "wh";
23 char *toremove = "onson";
24 char *fromreplace = "wh";
25 char *toreplace = "jonson";
26 index_t indices_2[100];
27 index_t *indices2;
28 count_t count2;
29 char ts[1000];
30 char rs[1000];
31
32 /**//*e.g:*/
33 /**//*index_t indexof(char *text, char c);*/
34 printf("the first index of %c is %d.\n",c,indexof(s,c));
35
36 /**//*count_t search(char *text, char c, index_t *out_indices)*/
37 count1 = search_c(s,c,indices1);
38 printf("the '%c' in the text is in : ",c);
39 while(count1--){
40 printf("%d ",*indices1++);
41 }
42 printf(".\n");
43
44 /**//*count_t search_s(char *text, const char *tofind, index_t *out_indices)*/
45 indices2 = indices_2;
46 count2 = search_s(s,tofind,indices2);
47 printf("the \"%s\" in the text is in : ",tofind);
48 while(count2--){
49 printf("%d ",*indices2++);
50 }
51 printf(".\n");
52
53 printf("the word is:\n\t%s\n",s);
54 replace(s, fromreplace, toreplace,ts);
55 printf("the word has be changed as (\"%s\"->\"%s\") :\n\t%s\n",fromreplace, toreplace, ts);
56
57 to_remove(ts,toremove,rs);
58 printf("the word remove (remove \"%s\") as : \n\t%s\n",toremove,rs);
59
60 return EXIT_SUCCESS;
61}
62
一条正则表达式可以搞定的东西,我们需要写这么多的代码才能够实现,相形见拙就此体现。
下面是输出结果:
the first index of w is 10.
the 'w' in the text is in : 10 16 30 .
the "wh" in the text is in : 16 30 .
the word is:
this is a word, who contains "wh"!
the word has be changed as ("wh"->"jonson") :
this is a word, jonsono contains "jonson"!
the word remove (remove "onson") as :
this is a word, jo contains "j"!
the 'w' in the text is in : 10 16 30 .
the "wh" in the text is in : 16 30 .
the word is:
this is a word, who contains "wh"!
the word has be changed as ("wh"->"jonson") :
this is a word, jonsono contains "jonson"!
the word remove (remove "onson") as :
this is a word, jo contains "j"!