去除注释中的代码

Fatunlorey

已于 2024-03-31 21:03:55 修改

阅读量864

点赞数 20

分类专栏：编译原理文章标签： c++

于 2024-03-27 23:06:50 首次发布

本文链接：https://blog.csdn.net/Fatunlorey/article/details/137092660

版权

编译原理专栏收录该内容

1 篇文章 0 订阅

订阅专栏

在编译原理中，源代码在转换为机器码之前，会经历一系列的处理步骤，其中之一就是预处理。预处理主要负责处理源代码文件中的预处理指令和文本替换工作，这包括宏定义的展开、文件包含的处理以及注释的删除等。具体到去除代码注释，这是预处理过程中的一个重要环节，其目的是清理掉程序中不会影响程序执行的辅助性文本，即注释部分，以便于后续的词法分析、语法分析和代码生成等步骤能够顺利进行。

方法一：直接实现

考虑C++中的注释：
1.单行注释：//
2.多行注释：/* */
3.引号中的注释符号应该被视为字符串(直接去实现这里比较复杂，dfa方法解决了这一问题)

#include <iostream>
#include <fstream>
#include <string>
using namespace std;

void remove_comments(ifstream& in, ofstream& out) {
    //引用传参，节省空间
    string line;
    bool multiline_comment = false;

    while (getline(in, line)) {
        //一次只处理一行代码
        int pos = 0;
        while (pos < line.length()) {
            if (!multiline_comment && line[pos] == '/' && pos + 1 < line.length() && line[pos + 1] == '*') {
                multiline_comment = true;
                pos += 2;
            }
            else if (multiline_comment && line[pos] == '*' && pos + 1 < line.length() && line[pos + 1] == '/') {
                multiline_comment = false;
                pos += 2;
            }
            else if (!multiline_comment && line[pos] == '/' && pos + 1 < line.length() && line[pos + 1] == '/') {
                break; // 忽略单行注释的代码
            }
            else if (!multiline_comment) {
                out << line[pos];
                pos++;
            }
            else {
                pos++;
                //忽略多行注释的中间部分
            }
        }

        if (!multiline_comment) {
            out << endl;
        }
    }
}

int main() {
    ifstream in("C:\\Users\\86183\\Desktop\\大二下考试\\编译原理\\input.cpp");
    ofstream out("C:\\Users\\86183\\Desktop\\大二下考试\\编译原理\\output.cpp");

    remove_comments(in, out);

    in.close();
    out.close();

    return 0;
}

方法二：使用有穷自动机(DFA)

该方法参考自这篇文章：http://t.csdnimg.cn/6hMPA

另外不懂DFA的原理的小伙伴可以看一下这篇文章：http://t.csdnimg.cn/m7iCB

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;

enum states
{
    S0 = 0,S1,S2,S3,S4,S5,S6
};

//只需要注意处理引号里面的注释符号

void remove_comments(ifstream& in, ofstream& out)
{
    /*读取完整的文件并存入字符串变量中*/
    string fileContents;
    char c;
    while (in.get(c)) {
        fileContents += c;
    }
    enum states curState = S0;
    enum states lastState = S0;
    char str;
    vector<char> tmp;
    for (int i = 0; i < fileContents.size(); i++)
    {
        str = fileContents[i];
        switch (curState)
        {
            case S0:
            {
                lastState = curState;
                if (str == '/')
                {
                    curState = S3;
                    //识别到第一个符号/的时候，还不能确定是否是注释，当识别到下一个/或者*的时候才能确定；
                    //因此此时需要把/和其后的内容放入一个额外的存储空间，那么该如何确认呢?注释最后识别完
                    //成的时候都会返回S0的状态，若上一个状态是S6，那么需要清空tmp；如果上一个状态是S5，那
                    //那么也需要清空tmp；但如果上一个状态是S3，就需要将tmp中的内容写入out。
                    tmp.push_back(str);
                }
                else if (str == '\"')
                {
                    curState = S1;
                    out << str;
                }
                else if (str == '\'')
                {
                    curState = S2;
                    out << str;
                }
                else
                {
                    out << str;
                }
                break;
            }

            case S1:
            {
                lastState = curState;
                out << str;
                if (str == '\"')
                {
                    curState = S0;
                }
                else
                {
                    curState = S1;
                }
                break;
            }

            case S2:
            {
                lastState = curState;
                out << str;
                if (str == '\'')
                {
                    curState = S0;
                }
                else
                {
                    curState = S2;
                }
                break;
            }

            case S3:
            {
                lastState = curState;
                tmp.push_back(str);
                if (str == '*')
                {
                    curState = S4;
                }
                else if (str == '/')
                {
                    curState = S6;
                }
                else
                {
                    curState = S0;
                    //out << str;如果在此时写入，会丢失/符号
                }
                break;
            }

            case S4:
            {
                lastState = curState;
                tmp.push_back(str);
                if (str == '*')
                {
                    curState = S5;
                }
                else
                {
                    curState = S4;
                }
                break;
            }

            case S5:
            {
                lastState = curState;
                tmp.push_back(str);
                if (str == '/')
                {
                    curState = S0;
                }
                else
                {
                    curState = S4;
                }
                break;
            }

            case S6:
            {
                lastState = curState;
                
                if (str == '\n')
                {
                    curState = S0;
                    out << endl;
                }
                else
                {
                    curState = S6;
                    tmp.push_back(str);
                }
                break;
            }

            default:
                break;
        }

        if (curState == S0 && !tmp.empty())
        {
           if (lastState == S3)
           {
                for (int i = 0; i < tmp.size(); i++)
                    out << tmp[i];
           }
           tmp.clear();
        }

    }
}

int main() {
    ifstream in("C:\\Users\\86183\\Desktop\\大二下考试\\编译原理\\input.cpp", ios_base::in);
    ofstream out("C:\\Users\\86183\\Desktop\\大二下考试\\编译原理\\output.cpp");
    //cout << "File open successfully." << endl;
    cout << "File contents without comments" << endl;
    remove_comments(in, out);
    in.close();
    out.close();
    
    return 0;
}

喜欢的小伙伴还请点赞收藏，(❁´◡`❁)(　o=^•ェ•)o　┏━┓

Fatunlorey

关注

20
点赞
踩
16

收藏

觉得还不错? 一键收藏
0
评论
去除注释中的代码

在编译原理中，源代码在转换为机器码之前，会经历一系列的处理步骤，其中之一就是预处理。预处理主要负责处理源代码文件中的预处理指令和文本替换工作，这包括宏定义的展开、文件包含的处理以及注释的删除等。具体到去除代码注释，这是预处理过程中的一个重要环节，其目的是清理掉程序中不会影响程序执行的辅助性文本，即注释部分，以便于后续的词法分析、语法分析和代码生成等步骤能够顺利进行。
复制链接

扫一扫