markdown替换所有\def命令

zsc_118
已于 2024-08-08 21:06:59 修改
阅读量400
点赞数 3
文章标签： word markdown pandoc
于 2024-05-10 12:42:53 首次发布
本文链接：https://blog.csdn.net/m0_61219098/article/details/138647305
版权
最近在用pandoc将markdown转word的时候发现，如果存在\def命令，pandoc不会自动替换，导致导出的word中大量数学公式无法转换。因此我写了如下C++程序，可以预处理markdown文件并替换一些pandoc无法识别的命令。（我写的markdown文件为GBK编码的，如果是UTF-8编码请注释掉倒数第7行）
#include <fstream>
#include <string>
#include <sstream>
#include <iostream>
#include <windows.h>
using namespace std;
bool file2str(string &s, const char *p)
{
    ifstream f(p);
    if (!f.is_open())
        return true;
    ostringstream buf;
    char ch;
    while (buf && f.get(ch))
        buf.put(ch);
    s = buf.str();
    return false;
}
void gbk2utf8(string &strGBK)
{
    int len = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, nullptr, 0);
    wchar_t *wszUtf8 = new wchar_t[len];
    memset(wszUtf8, 0, len);
    MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, wszUtf8, len);
    len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, nullptr, 0, nullptr, nullptr);
    char *szUtf8 = new char[len + 1];
    memset(szUtf8, 0, len + 1);
    WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, szUtf8, len, nullptr, nullptr);
    strGBK = szUtf8;
    delete[] szUtf8;
    delete[] wszUtf8;
}
#define isAlpha(c) c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
#define isnAlpha(c) (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')
int main()
{
    string s, s1, *C, *D, input, output;
    cout << "请输入markdown文件路径：";
    cin >> input;
    cout << "请输入word文件路径：";
    cin >> output;
    if (file2str(s, input.c_str()))
    {
        printf("文件打开失败!");
        return 1;
    }
    size_t n, m, k(0), l, d;
    uint8_t c, t, a;
    char A[5];
    string::iterator p, q, r;
    while ((k = s.find("\\def", k)) != string::npos)
    {
        if (*(q = (r = s.begin() + k) + 4) != '\\')
        {
            ++k;
            continue;
        }
        ++(p = q);
        while (isAlpha(*p))
            ++p;
        string O(q, p);
        c = 0;
        while (*p != '{')
        {
            if (*p == '#')
                ++c;
            ++p;
        }
        q = p;
        t = 1;
        do
            if (*++q == '{')
                ++t;
            else if (*q == '}')
                --t;
        while (t);
        string N(++p, q);
        s.erase(r, ++q);
        if (c)
        {
            s1 = "";
            D = C = new string[c];
            while ((n = s.find(O)) != string::npos)
            {
#define BACKSLASH_IS_BEGIN !((n - (p - s.begin())) & 1) || p == s.begin() && *p == '\\'
#define JUDGE_BACKSLASH     \
    p = s.begin() + n;      \
    while (p != s.begin())  \
        if (*--p != '\\')   \
            break;          \
    if (BACKSLASH_IS_BEGIN) \
    {                       \
        ++n;                \
        continue;           \
    }
                JUDGE_BACKSLASH
                m = O.length();
                s1.append(s.begin(), s.begin() + n);
                s.erase(0, n);
                p = s.begin() + m;
                if (isAlpha(*p))
                {
                    s1.push_back(s.front());
                    s.erase(s.begin());
                    continue;
                }
                if (*p == '{')
                {
                    q = p;
                    a = 1;
                    do
                        if (*++p == '{')
                            ++a;
                        else if (*p == '}')
                            --a;
                    while (a);
                    *C = string(q, p + 1);
                }
                else if (*p == '\\')
                {
                    q = p;
                    ++p;
                    while (isAlpha(*p))
                        ++p;
                    *C = '{' + string(q, p) + '}';
                }
                else
                {
                    if (*p == ' ')
                        ++p;
                    sprintf(A, "{%c}", *p);
                    *C = A;
                }
                t = c;
                while (--t)
                    if (*++p == '{')
                    {
                        q = p;
                        a = 1;
                        do
                            if (*++p == '{')
                                ++a;
                            else if (*p == '}')
                                --a;
                        while (a);
                        *++D = string(q, p + 1);
                    }
                    else if (*p == '\\')
                    {
                        q = p;
                        ++p;
                        while (isAlpha(*p))
                            ++p;
                        *C = '{' + string(q, p) + '}';
                    }
                    else
                    {
                        sprintf(A, "{%c}", *p);
                        *++D = A;
                    }
                ++(m = p - s.begin());
                string N1(N);
                ++D;
                do
                {
                    sprintf(A, "#%d", D - C);
                    --D;
                    n = 0;
                    l = D->length();
                    while ((n = N1.find(A, n)) != string::npos)
                    {
                        N1.replace(n, strlen(A), *D);
                        n += l;
                    }
                } while (D != C);
                s1.append(N1);
                s.erase(0, m);
            }
            delete[] C;
            s = s1 + s;
        }
        else
        {
            n = 0;
            m = O.length();
            l = N.length();
            while ((n = s.find(O, n)) != string::npos)
            {
                JUDGE_BACKSLASH
                q = (p = s.begin() + n) + m;
                if (q != s.end() && (isAlpha(*q)))
                {
                    ++n;
                    continue;
                }
                s.replace(p, q, N);
                n += l;
            }
        }
    }
    n = 0;
#define NEXT_CIRC \
    {             \
        ++n;      \
        continue; \
    }
    while ((n = s.find("\\limits", n)) != string::npos)
    {
        q = (p = s.begin() + n) + 7;
        if (q != s.end() && (isAlpha(*q)))
            NEXT_CIRC
        r = p;
        while (r != s.begin())
        {
            --r;
            if (isnAlpha(*r))
                break;
        }
        if (*r != '\\')
            NEXT_CIRC
        s1 = string(r, p);
        c = *q;
        if (c != '_' && c != '^')
            NEXT_CIRC
    F_limits:
        p = ++q;
        if (*p == '{')
        {
            t = 1;
            do
                if (*++q == '}')
                    --t;
                else if (*q == '{')
                    ++t;
            while (t);
            s1 = string(p, ++q) + '{' + s1 + '}';
        }
        else if(*p=='\\')
        {
            ++q;
            while(isAlpha(*q))
                ++q;
            s1 = '{' + string(p, q) + "}{" + s1 + '}';
        }
        else
        {
            sprintf(A, "{%c}{", *p);
            s1 = A + s1 + '}';
            ++q;
        }
        s1 = (c == '_' ? "\\underset" : "\\overset") + s1;
        c = *q;
        if (c != '_' && c != '^')
        {
            s.replace(r, q, s1);
            continue;
        }
        goto F_limits;
    }
#undef NEXT_CIRC
#define replaceAll(O, N)                       \
    n = 0;                                     \
    m = strlen(O);                             \
    l = strlen(N);                             \
    while ((n = s.find(O, n)) != string::npos) \
    {                                          \
        q = (p = s.begin() + n) + m;           \
        if (q != s.end() && (isAlpha(*q)))     \
        {                                      \
            ++n;                               \
            continue;                          \
        }                                      \
        s.replace(p, q, N);                    \
        n += l;                                \
    }
#define eraseAll(O)                            \
    n = 0;                                     \
    m = strlen(O);                             \
    while ((n = s.find(O, n)) != string::npos) \
    {                                          \
        q = (p = s.begin() + n) + m;           \
        if (q != s.end() && (isAlpha(*q)))     \
        {                                      \
            ++n;                               \
            continue;                          \
        }                                      \
        s.erase(p, q);                         \
    }
/*#define replaceAll(O, N)                       \
    n = 0;                                     \
    m = strlen(O);                             \
    l = strlen(N);                             \
    while ((n = s.find(O, n)) != string::npos) \
    {                                          \
        s.replace(n, m, N);                    \
        n += l;                                \
    }
#define eraseAll(O)                            \
    n = 0;                                     \
    m = strlen(O);                             \
    while ((n = s.find(O, n)) != string::npos) \
        s.erase(n, m);*/
    replaceAll("\\N", "\\mathbb N")
    replaceAll("\\Z", "\\mathbb Z")
    replaceAll("\\Q", "\\mathbb Q")
    replaceAll("\\R", "\\mathbb R")
    replaceAll("\\C", "\\mathbb C")
    replaceAll("{\\rm", "\\mathrm{")
    replaceAll("{\\bf", "\\mathbf{")
    replaceAll("\\part", "\\partial")
    replaceAll("\\varlimsup", "\\overline{\\lim}")
    replaceAll("\\varliminf", "\\underline{\\lim}")
    replaceAll("\\sube", "\\subseteq")
    replaceAll("\\supe", "\\supseteq")
    replaceAll("\\infin", "\\infty")
    replaceAll("\\not\\to","\\nrightarrow")
    replaceAll("\\ang","\\angle")
    replaceAll("\\lang","\\langle")
    replaceAll("\\rang","\\rangle")
    // eraseAll("\\limits")
    eraseAll("\\tiny")
    eraseAll("\\scriptsize")
    eraseAll("\\footnotesize")
    eraseAll("\\normalsize")
    eraseAll("\\small")
    eraseAll("\\large")
    eraseAll("\\Large")
    eraseAll("\\LARGE")
    eraseAll("\\huge")
    eraseAll("\\Huge")
    // 如果还有别的警告信息，请在此补充替换。
    n = 0;
#define IT_IS_BEGIN(p) *p == '\\' && *(p + 1) == 'b' && *(p + 2) == 'e' && *(p + 3) == 'g' && *(p + 4) == 'i' && *(p + 5) == 'n' && ((*(p + 6) < 'a' || *(p + 6) > 'z') && (*(p + 6) < 'A' || *(p + 6) > 'Z'))
#define IT_IS_END(p) *p == '\\' && *(p + 1) == 'e' && *(p + 2) == 'n' && *(p + 3) == 'd' && ((*(p + 4) < 'a' || *(p + 4) > 'z') && (*(p + 4) < 'A' || *(p + 4) > 'Z'))
#define FIND_NEXT_END          \
    p = s.begin() + l;         \
    c = 1;                     \
    do                         \
    {                          \
        if (++p == s.end())    \
            break;             \
        if (IT_IS_BEGIN(p))    \
            ++c;               \
        else if (IT_IS_END(p)) \
            --c;               \
    } while (c);               \
    d = p - s.begin();
#define ADD_INDEX(x)       \
    if (x != string::npos) \
        x += 5;
    while ((n = s.find("$$\n", n)) != string::npos)
    {
        s[k = n += 2] = ' ';
        s[m = s.find("\n$$", n)] = ' ';
        l = s.find("\\begin", n);
        FIND_NEXT_END
        while ((k = s.find("\\\\", k)) < m)
        {
            while (k > d)
            {
                l = s.find("\\begin", d);
                FIND_NEXT_END
            }
            if (k > l)
            {
                ++k;
                continue;
            }
            s.replace(k, 2, " $$\n$$ ");
            n = k += 7;
            ADD_INDEX(m)
            ADD_INDEX(l)
            ADD_INDEX(d)
        }
        n = m + 3;
    }
    gbk2utf8(s);
    FILE *f(fopen((output + ".md").c_str(), "w"));
    fprintf(f, "%s", s.c_str());
    fclose(f);
    system(("pandoc -i " + output + ".md -o " + output).c_str());
    return 0;
}