1 使用转换工具将《PPT转H5系列(六)----提取PPT中使用的字体文件》中PPT转换后的pdf文件转换成xml文件
转换工具:mudraw.exe
命令示例如下:./mudraw.exe -F stext "C:\Users\user\AppData\Local\Temp\HTML5Point\default.pdf"
解析出来的xml然后用tinyxml2进行解析,得出每一页的使用的字体信息
void MuxmlReader::OpenFromString(const std::string& xml)
{
try
{
tinyxml2::XMLDocument doc;
if(doc.Parse(xml.c_str()) == tinyxml2::XML_SUCCESS)
{
tinyxml2::XMLElement* element = doc.RootElement();
this->m_document.DeSerial(element);
this->State = MuxmlReader::MuxmlReaderState_Loaded;
}
else
{
this->State = MuxmlReader::MuxmlReaderState_Invalid;
}
}
catch(...)
{
this->State = MuxmlReader::MuxmlReaderState_Invalid;
}
}
然后遍历节点,获取使用的字体信息并保存
void CPdfPresentation::OpenFromString(const std::string& strXmlString)
{
try
{
this->m_MuxmlReader.OpenFromString(strXmlString);
vector<documentPage> pages = this->m_MuxmlReader.m_document.get_page();
for (int i = 0; i < pages.size(); i++)
{
documentPage& page = pages[i];
A::CPdfXmlPageInfo pageinfo;
vector<documentPageBlock>& blocks = page.blockField;
for (int i = 0; i < blocks.size(); i++)
{
documentPageBlock& block = blocks[i];
vector<documentPageBlockLine>& lines = block.lineField;
if(lines.size() > 0)
{
for (int i = 0; i < lines.size(); i++)
{
documentPageBlockLine& line = lines[i];
if (line.spanField.size() > 0)
{
vector<documentPageBlockLineSpan>& spans = line.spanField;
for (int i = 0; i < spans.size(); i++)
{
documentPageBlockLineSpan& span = spans[i];
if (span.charField.size() > 0)
{
CPdfXmlFontInfo info;
info.FontSize = (float)span.get_size();
info.FontName = span.get_font();
info.LineSpan = span;
pageinfo.m_PdfXmlFontInfo_Vec.push_back(info);
}
}
}
}
}
}
this->m_PdfXmlPageInfo_Vec.push_back(pageinfo);
}
}
catch(...)
{
this->m_PdfXmlPageInfo_Vec.clear();
}
}
本项目还在开发中,已有能运行的程序,但还不完善,欢迎感兴趣的朋友私信交流
测试程序下载地址:
https://download.csdn.net/download/cwp0508/88958928?spm=1001.2014.3001.5503