#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFF_LEN 20000
void remove_commment(char *buf, size_t size);
int main(int argc, char *argv[]) {
if (argc != 2) {
puts("need a arg:c or cpp file");
return 0;
}
FILE *p = fopen(argv[1], "r");
if (p == NULL) {
puts("null ptr");
return -1;
}
char buf[BUFF_LEN];
int read_len = fread(buf, 1, BUFF_LEN - 1, p);
printf("n:%d\n", read_len);
if (read_len == 0) {
fclose(p);
return -1;
}
remove_commment(buf, read_len);
*(buf + read_len) = '\0';
printf("-----code-----\n%s\n", buf);
fclose(p);
return 0;
}
void remove_commment(char *buf, size_t size) {
char *p, *end, c;
char *sq_start = 0, // single quote
*dq_start = 0, // double quote
*lc_start = 0, *bc_start = 0;
size_t len;
p = buf;
end = p + size;
while (p < end) {
c = *p;
switch (c) {
case '\'': //单引号
if (dq_start || lc_start ||
bc_start) //在双引号或者注释区域的单引号直接跳过
{
p++;
continue;
}
if (sq_start == NULL) //语句中的第一个单引号,记录起点
{
sq_start = p++;
} else { //语句中的第二个单引号,
len = p++ - sq_start;
if (len == 2 &&
*(sq_start + 1) ==
'\\') //遇到
//'\''的情况第三个单引号直接跳过,否则把单引号起点置零
{
continue;
}
sq_start = NULL;
}
break;
case '\"': //双引号
if (sq_start || lc_start || bc_start) {
p++;
continue;
}
if (dq_start == NULL) {
dq_start = p++;
} else {
if (*(p++ - 1) == '\\') //忽略反斜杠后面的双引号
{
continue;
}
dq_start = NULL;
}
break;
case '/': //斜杠
if (sq_start || dq_start || lc_start || bc_start) {
p++;
continue;
}
c = *(p + 1);
if (c == '/') //遇到双斜杠,行注释
{
lc_start = p;
p += 2;
} else if (c == '*') { //块注释
bc_start = p;
p += 2;
} else { //只有一个斜杠就是除号,跳过
p++;
}
break;
case '*': //星号
if (sq_start || dq_start || lc_start || bc_start == NULL) {
p++;
continue;
}
if (*(p + 1) != '/') //不是块注释结尾
{
p++;
continue;
}
p += 2;
memset(bc_start, ' ', p - bc_start); //块注释的结尾
bc_start = NULL;
break;
case '\n': ///换行
if (lc_start == NULL) {
p++;
continue;
}
c = *(p - 1); //行注释的结尾
memset(lc_start, ' ', (c == '\r' ? (p++ - 1) : p++) - lc_start);
lc_start = NULL;
break;
default:
p++;
break;
}
}
if (lc_start) {
memset(lc_start, ' ', p - lc_start);
}
}