将word转换为xml,根据xml的标签,解析内容,从而解析出word里面的文字,图片。
public class ReadWord2
{
Element resultNode = null;
boolean isFound = false;
String topoPicId= "";
String topoPicName="";
String imgPicName = "/word/media/";
String topoPicStr = "";
public static void main(String[] args)
{
ReadWord2 rw = new ReadWord2();
rw. readTable();
}
void readTable()
{
String filePath = "D:\\temp\\target1.xml";
try
{
SAXReader sax = new SAXReader();//创建一个SAXReader对象
File xmlFile = new File(filePath);//根据指定的路径创建file对象
Document document = sax.read(xmlFile);//获取document对象,如果文档无节点,则会抛出Exception提前结束
Element root = document.getRootElement();//获取根节点
getNodesByNameAndText(root,"w:tc","#networkTopo#");
// getNodes(root,"shape","#networkTopo#");
if(resultNode!=null)
{
isFound = false;
getNodesByNameAndText(resultNode,"v:imagedata","");
Element shapeNode = resultNode;
List<Attribute> listAttr = shapeNode.attributes();//当前节点的所有属性的list
for (Attribute attr : listAttr)
{//遍历当前节点的所有属性
String name = attr.getQualifiedName();//属性名称
String value = attr.getValue();//属性的值
if (name.equals("r:id"))
{
topoPicId = value; //rId8
isFound =false;
getNodesByNameAndAttr(root,"Relationship",topoPicId);
if(isFound)
{
String target = resultNode.attribute("Target").getValue();
topoPicName = target.split("/")[1];
isFound =false;
imgPicName += topoPicName;
getNodesByNameAndAttr(root,"pkg:part",imgPicName);
if(isFound)
{
topoPicStr = resultNode.element("binaryData").getText();
try
{
base64toImg(topoPicStr);
}
catch (IOException e)
{
e.printStackTrace();
}
}
}
System.out.println("属性名称:" + name + "属性值:" + value);
break;
}
}
}
}
catch (DocumentException e)
{
e.printStackTrace();
}
}
/**
* 从指定节点开始,递归遍历所有子节点
*
* @author chenleixing
*/
public void getNodesByNameAndText(Element node,String nodeName,String nodeText)
{
if(nodeName.equals( node.getQualifiedName()))
{
String txt = node.getStringValue();
List<?> trs = node.elements();
if(txt.contains(nodeText))
{
resultNode = node;
isFound = true;
return ;
}
}else
{
//递归遍历当前节点所有的子节点
List<Element> listElement = node.elements();//所有一级子节点的list
for (Element e : listElement)
{//遍历所有一级子节点
if(!isFound)
{
getNodesByNameAndText(e, nodeName, nodeText);//递归
}
}
}
}
public void getNodesByNameAndAttr(Element node, String nodeName, String attributeValue)
{
if (nodeName.equals(node.getQualifiedName()))
{
String txt = node.getStringValue();
List<Attribute> listAttr = node.attributes();//当前节点的所有属性的list
for (Attribute attr : listAttr)
{//遍历当前节点的所有属性
String name = attr.getName();//属性名称
String value = attr.getValue();//属性的值
if (value.equals(attributeValue))
{
resultNode = node;
isFound = true;
return;
}
}
}
else
{
//递归遍历当前节点所有的子节点
List<Element> listElement = node.elements();//所有一级子节点的list
for (Element e : listElement)
{//遍历所有一级子节点
if (!isFound)
{
getNodesByNameAndAttr(e, nodeName, attributeValue);//递归
}
}
}
}
private void convertStrToImg(String imgStr)
throws IOException
{
byte[] imgByte = imgStr.getBytes();
ByteArrayInputStream bais = new ByteArrayInputStream(imgByte);
try {
BufferedImage bi1 = ImageIO.read(bais);
File w2 = new File("d:\\temp\\pp.png");//可以是jpg,png,gif格式
ImageIO.write(bi1, "jpg", w2);//不管输出什么格式图片,此处不需改动
} catch (IOException e) {
e.printStackTrace();
}
finally{
bais.close();
}
}
private void base64toImg(String imgStr)
throws IOException
{
BASE64Decoder decoder = new BASE64Decoder();
byte[] imgByte = decoder.decodeBuffer(imgStr);
ByteArrayInputStream bais = new ByteArrayInputStream(imgByte);
try
{
BufferedImage bi1 = ImageIO.read(bais);
File w2 = new File("d:\\temp\\pp.png");//可以是jpg,png,gif格式
ImageIO.write(bi1, "jpg", w2);//不管输出什么格式图片,此处不需改动
}
catch (IOException e)
{
e.printStackTrace();
}
finally
{
bais.close();
}
}
}