1、将excel另存为xml文档,
在office下:
wps下另存为:
2、采用libxml2解析xml数据:
xmlDocPtr doc;
xmlNodePtr cur;
//doc = xmlParseFile(&file_name[0]);
doc = xmlReadFile(&file_name[0], NULL,XML_PARSE_NOBLANKS);
if (doc == NULL ) {
fprintf(stderr,"Document not parsed successfully. \n");
return FALSE;
}
cur = xmlDocGetRootElement(doc);
if (cur == NULL) {
fprintf(stderr,"empty document\n");
xmlFreeDoc(doc);
return FALSE;
}
//文件根元素为Workbook
if (xmlStrcmp(cur->name, (const xmlChar *) "Workbook")) {
fprintf(stderr,"document of the wrong type, root node != story");
xmlFreeDoc(doc);
return FALSE;
}
//找到Worksheet子节点
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"Worksheet"))){
//parseStory (doc, cur);
break;
}
cur = cur->next;
}
//Worksheet的两次子节点到Table元素
cur = cur->children;
cur = cur->children;
if(!cur)
{
xmlFreeDoc(doc);
return FALSE;
}
//找到行开始元素
while(cur)
{
if (!xmlStrcmp(cur->name, (const xmlChar*)"Row"))
{
break;
}
//printf(" ");
//printf((char*)cur->name);
cur = cur->next;
}
//这里找到所要的行,作为例子,这里为第三行
int i;
for(i = 0; i < 3 && cur; i++)
{
cur = cur->next;
//printf(" ");
//printf((char*)cur->name);
}
int count = 0;
while(cur)
{
xmlNodePtr cell;
cell = cur->children;
//printf("\n");
if(!cell)
{
break;
}
//假设有8列
for(i = 0; i< 8; i++)
{
xmlNodePtr data;
xmlChar* content;
xmlChar* index;
if (!cell)
{
break;
}
//有数据的列起始位置,也就是前面的cell里如何没有数据,那么是无需存储的
index = xmlGetProp(cell, (const xmlChar*)"Index");
if(index)
{
i = atoi((char*)index)-1;
xmlFree(index);
}
data = cell->children;
if(data)
{
//处理相关数据
content = xmlNodeGetContent(cell);
switch(i)
{
case 0:
break;
case 1:
break;
case 2:
break;
case 3:
break;
case 4:
break;
case 5:
break;
case 6:
break;
case 7:
break;
case 8:
break;
default:
break;
}
}
else
{
//一整行没有数据,认为已经读完,当然也可以让程序循环到row的兄弟节点为NULL,则为结束
if(i == 0)
{
file_finished = 1;
break;
}
}
cell = cell->next;
}
if(file_finished)
{
break;
}
cur = cur->next;
count++;
}
//释放内存
xmlFreeDoc(doc);
3、excel文件格式说明:
第一部分:文件说明。包括作者、创建日期、修改日期、xml名空间等。
<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
<Author>Administrator</Author>
<Created>2016-11-16T06:31:00Z</Created>
<LastSaved>2016-11-29T14:31:27Z</LastSaved>
</DocumentProperties>
<CustomDocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
<KSOProductBuildVer dt:dt="string">2052-10.1.0.6065</KSOProductBuildVer>
</CustomDocumentProperties>
<ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel">
<WindowWidth>19095</WindowWidth>
<WindowHeight>8850</WindowHeight>
<ProtectStructure>False</ProtectStructure>
<ProtectWindows>False</ProtectWindows>
</ExcelWorkbook>
第二部分:excel的单元格格式说明
<Styles>
<Style ss:ID="s33" ss:Name="20% - 强调文字颜色 5">
<Font ss:FontName="宋体" x:CharSet="0" ss:Size="11" ss:Color="#000000"/>
<Interior ss:Color="#D9E1F2" ss:Pattern="Solid"/>
</Style>
<Style ss:ID="s31" ss:Name="好">
<Font ss:FontName="宋体" x:CharSet="0" ss:Size="11" ss:Color="#006100"/>
<Interior ss:Color="#C6EFCE" ss:Pattern="Solid"/>
</Style>
<Style ss:ID="s2" ss:Name="20% - 强调文字颜色 3">
<Font ss:FontName="宋体" x:CharSet="0" ss:Size="11" ss:Color="#000000"/>
<Interior ss:Color="#EDEDED" ss:Pattern="Solid"/>
</Style>
<Style ss:ID="s8" ss:Name="千位分隔">
<NumberFormat ss:Format="_ * #,##0.00_ ;_ * \-#,##0.00_ ;_ * "-"??_ ;_ @_ "/>
</Style>
<Style ss:ID="s44" ss:Name="40% - 强调文字颜色 5">
<Font ss:FontName="宋体" x:CharSet="0" ss:Size="11" ss:Color="#000000"/>
<Interior ss:Color="#B4C6E7" ss:Pattern="Solid"/>
</Style>
<Style ss:ID="s18" ss:Name="解释性文本">
<Font ss:FontName="宋体" x:CharSet="0" ss:Size="11" ss:Color="#7F7F7F" ss:Italic="1"/>
</Style>
<Styles> 这里仅为举例,其实格式说明很多,包括单元字体、边框、字体颜色等等。 第三部分:单元格数据,首先是表格列属性说明,紧接着是每一行的数据,这里仅列举部分。 <Worksheet ss:Name="Sheet1"> <Table ss:ExpandedColumnCount="254" ss:ExpandedRowCount="67" x:FullColumns="1" x:FullRows="1" ss:StyleID="s53" ss:DefaultColumnWidth="54" ss:DefaultRowHeight="13.5"> <Column ss:Index="1" ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="45"/> <Column ss:Index="2" ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="53.25"/> <Column ss:StyleID="s50" ss:AutoFitWidth="0" ss:Width="31.5"/> <Column ss:StyleID="s50" ss:AutoFitWidth="0" ss:Width="33.75"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="30"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="30.75"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="33"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="32.25"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="36.75"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="35.25"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="33.75"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="35.25"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="30.75"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="54.75"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="27" ss:Span="1"/> <Column ss:Index="17" ss:StyleID="s50" ss:AutoFitWidth="0" ss:Width="33"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="29.25"/> <Column ss:StyleID="s53" ss:AutoFitWidth="0" ss:Width="44.25"/> <Column ss:StyleID="s54" ss:AutoFitWidth="0" ss:Width="200.25"/> <Column ss:StyleID="s50" ss:Span="233"/> <Row ss:StyleID="s50" ss:Height="41.1"> <Cell ss:StyleID="s94" ss:MergeAcross="19"> <Data ss:Type="String"> this is cell data</Data> </Cell> </Row> <Row ss:StyleID="s51" ss:Height="33.95"> <Cell ss:Index="5" ss:StyleID="s63"> ss:StyleID <Data ss:Type="String">just</Data> </Cell> <Cell ss:StyleID="s60"> <Data ss:Type="String"> shi </Data> </Cell> </Row> //从第5个单元格开始才有数据,ss:名空间,单元格格式:编号为s63的 好了,介绍到这,仅抛砖引玉,如有疏漏之处,欢迎指正,谢谢!