原文链接:http://lixing123.com/archives/310
学习《C程序设计语言》到第1章最后,有一道题目:
编写一个删除C语言程序中所有的注释语句。要正确处理带引号的字符串与字符常量。在C语言中,注释不允许嵌套。
Exercise 1-23. Write a program to remove all comments from a C program. Don't forget to handle quoted strings and character constants properly. C comments don't nest.
刚开始,我用一种brute-force的方式,对每个字符进行遍历,然后进行判断,有没有进入注释。
这样做有一个非常麻烦的问题:只有连续检测到“//”或者"/*"时,才确定进入了注释状态;如果我们只检测到1个"/",后面就是其它字符串的话,还得将这个单独的"/"打印出来。
而且有许多的分支状态,如果用if/else的话,会难以理解,并容易出错。
在网上搜了一下,发现有一种解法非常好:状态机。在各种状态之间跳转,逻辑清晰,不易出错,出错了也容易调试。
下面把代码贴出来:
#include <stdio.h>
int state;
int c1,c2;
void change_state(int c);
int main(int argc, const char * argv[]) {
int c;
state = 0;
c1 = 0;
c2 = 0;
while ((c=getchar())!=EOF) {
c1 = c2;
c2 = c;
change_state(c);
}
if (/* DISABLES CODE */ (0)==1) {
printf("just test://abcd");
printf("just test:/*hello*/");
}
}
/*状态机函数*/
void change_state(int c){
if (state==0) {//普通状态
if (c=='/') {
state = 1;
}else if (c=='"'){
state = 5;
putchar(c);
}else if (c=='\''){
state = 6;
putchar(c);
}
else{
state = 0;
putchar(c);
}
}else if (state==1) {//检测到1个'/'
if (c=='/') {
state = 2;
}else if (c=='*'){
state = 3;
}else{
state = 0;
putchar(c1);
putchar(c);
}
}else if (state==2) {// "//"注释状态
if (c=='\n') {
state = 0;
putchar(c);
}else{
state = 2;
}
}else if (state==3) {// "/*"注释状态
if (c=='*') {
state = 4;
}else{
state = 3;
}
}else if (state==4) {
if (c=='/') {
state = 0;
}else{
state = 3;
}
}else if (state==5){//在"字符串里
if (c=='"') {
state = 0;
putchar(c);
}else if(c=='\\'){
state = 7;
putchar(c);
}else{
state = 5;
putchar(c);
}
}else if (state==6){//在'字符里
if (c=='\'') {
state = 0;
putchar(c);
}else if(c=='\\'){
state = 8;
putchar(c);
}else{
state = 6;
putchar(c);
}
}else if (state==7){//在"字符串里的"\"
state = 5;
putchar(c);
}else if (state==8){//在'字符串里的"\"
state = 6;
putchar(c);
}
}
以本段代码作为输入,结果如下:
#include <stdio.h>
int state;
int c1,c2;
void change_state(int c);
int main(int argc, const char * argv[]) {
int c;
state = 0;
c1 = 0;
c2 = 0;
while ((c=getchar())!=EOF) {
c1 = c2;
c2 = c;
change_state(c);
}
if ( (0)==1) {
printf("just test://abcd");
printf("just test:/*hello*/");
}
}
void change_state(int c){
if (state==0) {
if (c=='/') {
state = 1;
}else if (c=='"'){
state = 5;
putchar(c);
}else if (c=='\''){
state = 6;
putchar(c);
}
else{
state = 0;
putchar(c);
}
}else if (state==1) {
if (c=='/') {
state = 2;
}else if (c=='*'){
state = 3;
}else{
state = 0;
putchar(c1);
putchar(c);
}
}else if (state==2) {
if (c=='\n') {
state = 0;
putchar(c);
}else{
state = 2;
}
}else if (state==3) {
if (c=='*') {
state = 4;
}else{
state = 3;
}
}else if (state==4) {
if (c=='/') {
state = 0;
}else{
state = 3;
}
}else if (state==5){
if (c=='"') {
state = 0;
putchar(c);
}else if(c=='\\'){
state = 7;
putchar(c);
}else{
state = 5;
putchar(c);
}
}else if (state==6){
if (c=='\'') {
state = 0;
putchar(c);
}else if(c=='\\'){
state = 8;
putchar(c);
}else{
state = 6;
putchar(c);
}
}else if (state==7){
state = 5;
putchar(c);
}else if (state==8){
state = 6;
putchar(c);
}
perfect!
感谢@roma823 及其文章:http://blog.csdn.net/roma823/article/details/6364849