使用有穷自动机(DFA)去除C/C++/Java的代码注释

该文章介绍了一个使用DFA(有穷自动机)来移除C/C++/Java代码中的注释的方法。代码已上传至Github,程序通过定义不同的状态来处理字符串,Buffer结构体用于管理S0和S3状态间的字符串。程序读取输入文件,输出去除了注释的新文件。
摘要由CSDN通过智能技术生成

使用有穷自动机(DFA)去除C/C++/Java的代码注释 (C语言实现)

DFA如上图
DFA状态的转换如上图

完整的代码在Github, 帮到你麻烦点赞

在放代码之前, 先放代码运行的效果
在这里插入图片描述
下面是代码, 代码不长. 要说明一下的是, Buffer结构体的设计是为了方便处理S0和S3状态互相转换的字符串. 除了S0->S3->S0的状态以外, 在其它时候Buffer中装的就是注释的内容.

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define BUFFER_SIZE 1024

enum States { // 上图的状态
    S0 = 0,
    S1, S2, S3, S4, S5, S6
};
typedef struct
{
    char buf[BUFFER_SIZE];
    int ptr; // 指向最后一个元素+1的位置
} Buffer;

int initBuffer(Buffer *buffer) {
    buffer->ptr = 0;
    memset(buffer->buf, '\0', BUFFER_SIZE);
}
int appendBuffer(Buffer *buffer, char c) {
    buffer->buf[buffer->ptr++] = c;
}
int isBufferEmpty(Buffer *buffer){
    if (buffer->ptr==0) return 1;
    else return 0;
}

int main(int argc, char const *argv[])
{
    const char* inputFile= "./intro.c"; // 需要移除注释的文件
    const char* outputFile = "./intro-no-comment.c"; // 生成的文件

    size_t sizeOfBuffer = sizeof(Buffer);
    Buffer* buffer = malloc(sizeof(Buffer));
    initBuffer(buffer);
    
    int res;
    printf("open: %s, generate file: %s \n", inputFile, outputFile);
    FILE* inputFp = fopen(inputFile,"r");
    if(inputFile==NULL) {
        printf("open file %s failed!", inputFile);
    }
    
    FILE* outFp = fopen(outputFile, "w");
    int c;
    enum States currState = S0; 
    enum States lastState = S0; 
    while((c = getc(inputFp))!=EOF) {
        switch (currState)
        {
            case S0:
            {
                lastState = currState;
                if (c=='\"') {
                    currState = S1;
                    putc(c, outFp);
                } else if (c=='\'')
                {
                    currState = S2;
                    putc(c, outFp);
                } else if (c=='/')
                {
                    currState = S3;
                    appendBuffer(buffer,c);
                } else {
                    putc(c, outFp);
                }
                break;
            }

            case S1:
            {
                lastState = currState;
                putc(c, outFp);
                if (c=='\"') {
                    currState = S0;
                } else {
                    currState = S1;
                }
                break;
            }

            case S2:
            {
                lastState = currState;
                putc(c, outFp);
                if (c=='\'') {
                    currState = S0;
                } else {
                    currState = S2;
                }
                break;
            }

            case S3:
            {
                lastState = currState;
                appendBuffer(buffer,c);
                if (c=='/') {
                    currState = S6;
                } else if (c=='*')
                {
                    currState = S4;
                } else {
                    // 非注释
                    currState = S0;
                }
                break;
            }

            case S4:
            {
                lastState = currState;
                appendBuffer(buffer,c);
                if (c=='*') {
                    currState = S5;
                } else {
                    currState = S4;
                }
                break;
            }

            case S5:
            {
                lastState = currState;
                appendBuffer(buffer,c);
                if (c=='/') {
                    currState = S0;
                } else {
                    currState = S4;
                }
                break;
            }

            case S6:
            {
                lastState = currState;
                appendBuffer(buffer,c);
                if (c=='\n') {
                    currState = S0;
                } else {
                    currState = S6;
                }
                break;
            }
        
            default:
                break;
        }
        if (currState==S0 && !isBufferEmpty(buffer)) {
            if (lastState==S3) {
                // 把buffer中的写入
                for(int i = 0;i<buffer->ptr;i++) {
                    putc(buffer->buf[i], outFp);
                }
                initBuffer(buffer);
            } else if (lastState==S6) {
                putc('\n', outFp);
                initBuffer(buffer);
            } else if (lastState==S5) {
                initBuffer(buffer);
            }
        }
    }
    
    printf("Done! \n");

    res = fclose(inputFp);
    res = fclose(outFp);
    return 0;
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值