最近在用pandoc将markdown转word的时候发现,如果存在\def
命令,pandoc不会自动替换,导致导出的word中大量数学公式无法转换。因此我写了如下C++程序,可以预处理markdown文件并替换一些pandoc无法识别的命令。(我写的markdown文件为GBK编码的,如果是UTF-8编码请注释掉倒数第7行)
#include <fstream>
#include <string>
#include <sstream>
#include <iostream>
#include <windows.h>
using namespace std;
bool file2str(string &s, const char *p)
{
ifstream f(p);
if (!f.is_open())
return true;
ostringstream buf;
char ch;
while (buf && f.get(ch))
buf.put(ch);
s = buf.str();
return false;
}
void gbk2utf8(string &strGBK)
{
int len = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, nullptr, 0);
wchar_t *wszUtf8 = new wchar_t[len];
memset(wszUtf8, 0, len);
MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, wszUtf8, len);
len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, nullptr, 0, nullptr, nullptr);
char *szUtf8 = new char[len + 1];
memset(szUtf8, 0, len + 1);
WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, szUtf8, len, nullptr, nullptr);
strGBK = szUtf8;
delete[] szUtf8;
delete[] wszUtf8;
}
#define isAlpha(c) c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
#define isnAlpha(c) (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')
int main()
{
string s, s1, *C, *D, input, output;
cout << "请输入markdown文件路径:";
cin >> input;
cout << "请输入word文件路径:";
cin >> output;
if (file2str(s, input.c_str()))
{
printf("文件打开失败!");
return 1;
}
size_t n, m, k(0), l, d;
uint8_t c, t, a;
char A[5];
string::iterator p, q, r;
while ((k = s.find("\\def", k)) != string::npos)
{
if (*(q = (r = s.begin() + k) + 4) != '\\')
{
++k;
continue;
}
++(p = q);
while (isAlpha(*p))
++p;
string O(q, p);
c = 0;
while (*p != '{')
{
if (*p == '#')
++c;
++p;
}
q = p;
t = 1;
do
if (*++q == '{')
++t;
else if (*q == '}')
--t;
while (t);
string N(++p, q);
s.erase(r, ++q);
if (c)
{
s1 = "";
D = C = new string[c];
while ((n = s.find(O)) != string::npos)
{
#define BACKSLASH_IS_BEGIN !((n - (p - s.begin())) & 1) || p == s.begin() && *p == '\\'
#define JUDGE_BACKSLASH \
p = s.begin() + n; \
while (p != s.begin()) \
if (*--p != '\\') \
break; \
if (BACKSLASH_IS_BEGIN) \
{ \
++n; \
continue; \
}
JUDGE_BACKSLASH
m = O.length();
s1.append(s.begin(), s.begin() + n);
s.erase(0, n);
p = s.begin() + m;
if (isAlpha(*p))
{
s1.push_back(s.front());
s.erase(s.begin());
continue;
}
if (*p == '{')
{
q = p;
a = 1;
do
if (*++p == '{')
++a;
else if (*p == '}')
--a;
while (a);
*C = string(q, p + 1);
}
else if (*p == '\\')
{
q = p;
++p;
while (isAlpha(*p))
++p;
*C = '{' + string(q, p) + '}';
}
else
{
if (*p == ' ')
++p;
sprintf(A, "{%c}", *p);
*C = A;
}
t = c;
while (--t)
if (*++p == '{')
{
q = p;
a = 1;
do
if (*++p == '{')
++a;
else if (*p == '}')
--a;
while (a);
*++D = string(q, p + 1);
}
else if (*p == '\\')
{
q = p;
++p;
while (isAlpha(*p))
++p;
*C = '{' + string(q, p) + '}';
}
else
{
sprintf(A, "{%c}", *p);
*++D = A;
}
++(m = p - s.begin());
string N1(N);
++D;
do
{
sprintf(A, "#%d", D - C);
--D;
n = 0;
l = D->length();
while ((n = N1.find(A, n)) != string::npos)
{
N1.replace(n, strlen(A), *D);
n += l;
}
} while (D != C);
s1.append(N1);
s.erase(0, m);
}
delete[] C;
s = s1 + s;
}
else
{
n = 0;
m = O.length();
l = N.length();
while ((n = s.find(O, n)) != string::npos)
{
JUDGE_BACKSLASH
q = (p = s.begin() + n) + m;
if (q != s.end() && (isAlpha(*q)))
{
++n;
continue;
}
s.replace(p, q, N);
n += l;
}
}
}
n = 0;
#define NEXT_CIRC \
{ \
++n; \
continue; \
}
while ((n = s.find("\\limits", n)) != string::npos)
{
q = (p = s.begin() + n) + 7;
if (q != s.end() && (isAlpha(*q)))
NEXT_CIRC
r = p;
while (r != s.begin())
{
--r;
if (isnAlpha(*r))
break;
}
if (*r != '\\')
NEXT_CIRC
s1 = string(r, p);
c = *q;
if (c != '_' && c != '^')
NEXT_CIRC
F_limits:
p = ++q;
if (*p == '{')
{
t = 1;
do
if (*++q == '}')
--t;
else if (*q == '{')
++t;
while (t);
s1 = string(p, ++q) + '{' + s1 + '}';
}
else if(*p=='\\')
{
++q;
while(isAlpha(*q))
++q;
s1 = '{' + string(p, q) + "}{" + s1 + '}';
}
else
{
sprintf(A, "{%c}{", *p);
s1 = A + s1 + '}';
++q;
}
s1 = (c == '_' ? "\\underset" : "\\overset") + s1;
c = *q;
if (c != '_' && c != '^')
{
s.replace(r, q, s1);
continue;
}
goto F_limits;
}
#undef NEXT_CIRC
#define replaceAll(O, N) \
n = 0; \
m = strlen(O); \
l = strlen(N); \
while ((n = s.find(O, n)) != string::npos) \
{ \
q = (p = s.begin() + n) + m; \
if (q != s.end() && (isAlpha(*q))) \
{ \
++n; \
continue; \
} \
s.replace(p, q, N); \
n += l; \
}
#define eraseAll(O) \
n = 0; \
m = strlen(O); \
while ((n = s.find(O, n)) != string::npos) \
{ \
q = (p = s.begin() + n) + m; \
if (q != s.end() && (isAlpha(*q))) \
{ \
++n; \
continue; \
} \
s.erase(p, q); \
}
/*#define replaceAll(O, N) \
n = 0; \
m = strlen(O); \
l = strlen(N); \
while ((n = s.find(O, n)) != string::npos) \
{ \
s.replace(n, m, N); \
n += l; \
}
#define eraseAll(O) \
n = 0; \
m = strlen(O); \
while ((n = s.find(O, n)) != string::npos) \
s.erase(n, m);*/
replaceAll("\\N", "\\mathbb N")
replaceAll("\\Z", "\\mathbb Z")
replaceAll("\\Q", "\\mathbb Q")
replaceAll("\\R", "\\mathbb R")
replaceAll("\\C", "\\mathbb C")
replaceAll("{\\rm", "\\mathrm{")
replaceAll("{\\bf", "\\mathbf{")
replaceAll("\\part", "\\partial")
replaceAll("\\varlimsup", "\\overline{\\lim}")
replaceAll("\\varliminf", "\\underline{\\lim}")
replaceAll("\\sube", "\\subseteq")
replaceAll("\\supe", "\\supseteq")
replaceAll("\\infin", "\\infty")
replaceAll("\\not\\to","\\nrightarrow")
replaceAll("\\ang","\\angle")
replaceAll("\\lang","\\langle")
replaceAll("\\rang","\\rangle")
// eraseAll("\\limits")
eraseAll("\\tiny")
eraseAll("\\scriptsize")
eraseAll("\\footnotesize")
eraseAll("\\normalsize")
eraseAll("\\small")
eraseAll("\\large")
eraseAll("\\Large")
eraseAll("\\LARGE")
eraseAll("\\huge")
eraseAll("\\Huge")
// 如果还有别的警告信息,请在此补充替换。
n = 0;
#define IT_IS_BEGIN(p) *p == '\\' && *(p + 1) == 'b' && *(p + 2) == 'e' && *(p + 3) == 'g' && *(p + 4) == 'i' && *(p + 5) == 'n' && ((*(p + 6) < 'a' || *(p + 6) > 'z') && (*(p + 6) < 'A' || *(p + 6) > 'Z'))
#define IT_IS_END(p) *p == '\\' && *(p + 1) == 'e' && *(p + 2) == 'n' && *(p + 3) == 'd' && ((*(p + 4) < 'a' || *(p + 4) > 'z') && (*(p + 4) < 'A' || *(p + 4) > 'Z'))
#define FIND_NEXT_END \
p = s.begin() + l; \
c = 1; \
do \
{ \
if (++p == s.end()) \
break; \
if (IT_IS_BEGIN(p)) \
++c; \
else if (IT_IS_END(p)) \
--c; \
} while (c); \
d = p - s.begin();
#define ADD_INDEX(x) \
if (x != string::npos) \
x += 5;
while ((n = s.find("$$\n", n)) != string::npos)
{
s[k = n += 2] = ' ';
s[m = s.find("\n$$", n)] = ' ';
l = s.find("\\begin", n);
FIND_NEXT_END
while ((k = s.find("\\\\", k)) < m)
{
while (k > d)
{
l = s.find("\\begin", d);
FIND_NEXT_END
}
if (k > l)
{
++k;
continue;
}
s.replace(k, 2, " $$\n$$ ");
n = k += 7;
ADD_INDEX(m)
ADD_INDEX(l)
ADD_INDEX(d)
}
n = m + 3;
}
gbk2utf8(s);
FILE *f(fopen((output + ".md").c_str(), "w"));
fprintf(f, "%s", s.c_str());
fclose(f);
system(("pandoc -i " + output + ".md -o " + output).c_str());
return 0;
}