之前有个人跟我讨论c++如何去掉注释,他强调如果有转义字符怎么处理,我想了半天,觉得只要识别了开始的标记,后面似乎不需要考虑转义字符啊?
而且如果只是识别注释,还可以一次读2个字符,
这里我分享一下我的想法,大家看看,
一次读1字符的状态机如下:
实现的代码如下:
#pragma once
#include <string>
#include <tuple>
#include <iostream>
using namespace std;
enum LEX_STAT {
STAT_0, // 代码状态
STAT_1, // 找到了 ‘/’
STAT_11, // 找到了 ‘//’
STAT_12, // 找到了 /*
STAT_13, // 找到了 /* ... *
};
LEX_STAT curStat = STAT_0;
string curString = "";
std::tuple<const char *, int> GetNextCommnet(const char * buffer)
{
curString = "";
curStat = STAT_0;
const char * p = buffer;
const char *begin = nullptr;
loop:
switch (curStat)
{
case STAT_0:
while (*p != '\0' && *p != '/')
{
p++;
}
if (*p == '\0')
return std::make_tuple(begin, 0);
if (*p == '/')
{
curStat = STAT_1;
begin = p;
p++;
goto loop;
}
break;
case STAT_1:
if (*p == '\0')
return std::make_tuple(begin, 0);
if (*p == '/')
{
curStat = STAT_11;
p++;
goto loop;
}
else if (*p == '*')
{
curStat = STAT_12;
p++;
goto loop;
}
else // 可能是除号或者其他
{
curStat = STAT_0;
goto loop;
}
break;
case STAT_11:
while (*p != '\0' && *p != '\n')
{
p++;
}
if (*p == '\0')
return std::make_tuple(begin, 0);
if (*p == '\n') // \r\n \n
{
curStat = STAT_0;
p++;
return std::make_tuple(begin, p - begin);
}
break;
case STAT_12:
while (*p != '\0' && *p != '*')
{
p++;
}
if (*p == '\0')
return std::make_tuple(begin, p - begin);
if (*p == '*')
{
curStat = STAT_13;
p++;
goto loop;
}
break;
case STAT_13:
if (*p != '/') // 可能是*
{
curStat = STAT_12;
goto loop;
}
else if (*p == '/')
{
curStat = STAT_0;
p++;
return std::make_tuple(begin, p - begin);
}
break;
default:
cout << "Error stat!" << endl;
break;
}
return std::make_tuple(p, 0);
}
测试代码如下:
void testComment()
{
string code = "start_code();\r\n"
"/* First comment ***/\r\n"
"more_code();\r\n"
"// a splash commnet \r\n\r\n"
"/* Second comment */\r\n"
"end_code(); \r\n"
"double temp = round / pi;\r\n";
int len = 0;
const char * p = code.c_str();
do {
auto ret = GetNextCommnet(p);
p = std::get<0>(ret);
len = std::get<1>(ret);
if (len > 0)
{
string str(p, len);
cout << str.c_str() << endl;
}
p = p + len;
} while (len);
}