1、最外层为zip. 可包含多个文件夹。
2、每个文件夹中的文件可以为.gz , .zip , .xml 。 gz和xml为xml的压缩文件。
3、输出读取的xml文件
public class readXML {
public static void main(String[] args) {
try {
String path = "E:/oldbak.zip";
ZipFile zf = new ZipFile(path,Charset.forName("gbk"));
Enumeration<? extends ZipEntry> zs = zf.entries();
while (zs.hasMoreElements()) {
ZipEntry ze = zs.nextElement();
//跳过目录
if (!ze.isDirectory() && ze.getSize()>0 ) {
String name = ze.getName();
InputStream input = null;
//判断文件是否包含关键字
if(name.contains("M2000")) {
//将文件读入InputStream中
if(name.endsWith(".gz")) {
input = new GZIPInputStream(zf.getInputStream(ze));
}else if (name.endsWith(".zip")){
input = new ZipInputStream(zf.getInputStream(ze));
((ZipInputStream)input).getNextEntry();
}if(name.endsWith(".XML")) {
input = zf.getInputStream(ze);
}
//处理InputStream中的数据
HashMap<String,String> map = decode(input);
Iterator<Entry<String,String>> iter = map.entrySet().iterator();
while(iter.hasNext()){
Entry<String, String> entry= iter.next();
System.out.println(entry.getKey()+":"+entry.getValue());
}
} else{
//logger.info("文件有误");
//return null;
}
}
}
zf.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static HashMap<String,String> decode(InputStream input){
HashMap<String,String> hash = new HashMap<String,String>();
Document doc = null;
SAXReader reader = null;
try
{
reader = new SAXReader();
reader.setEncoding("GBK");
if(input == null) return null;
doc = reader.read(input);
}catch (DocumentException e)
{
e.printStackTrace();
return null;
}
if(doc == null) return null ;
Element root = doc.getRootElement();
if(root == null) return null;
Iterator syn = root.elementIterator();
while(syn.hasNext()){
Element syndata = (Element)syn.next() ;
if(syndata==null || !(syndata.getName().equals("syndata")) ) continue;
for (Iterator i = syndata.elementIterator(); i.hasNext();) {
//每解析一个class,休息一下
try {
Thread.sleep(1);
} catch (InterruptedException e) {
e.printStackTrace();
}
Element cls = (Element) i.next();
if( !(cls.getName().equals("class")) ) continue;
Iterator j = cls.elementIterator();
if( !j.hasNext() ) continue;
while(j.hasNext()){
Element table = (Element)j.next();
String tablename= table.getName();
//System.out.println(tablename);
String tablehead = "";
String tableval ="";
int N=0;
Iterator k = table.elementIterator();
if( !k.hasNext() ) continue;
Element attr = (Element)k.next();
for (Iterator l = attr.elementIterator(); l.hasNext();) {
Element node = (Element) l.next();
Iterator n = node.elementIterator();
if(!n.hasNext()){
//表头字段若为关键字,则需要替换掉 --暂时只发现desc
String str = node.getName();
if(str.equalsIgnoreCase("DESC")) str = str + "0";
if(N!=0) tablehead += ",";
tablehead += str ;
//列以逗号分隔,需要将原来数据中的逗号替换为分号
String ss = node.getText()==""?" ":node.getText();
if(ss.contains(",")) ss = ss.replace("," , ";");
if(N!=0) tableval += ",";
tableval += ss;
}else{
String value="";
Integer count=0;
while(n.hasNext()) {
Element ele = (Element) n.next();
Iterator m = ele.elementIterator();
if(!m.hasNext())continue;
Element val = (Element) m.next();
//当一列有多个数据时,以"/"分隔
if(!count.equals(0))value+="/";
value += val.getText();
count++;
}
if(N!=0) tablehead += ",";
tablehead += node.getName();
if(value=="")value=" ";
if(N!=0) tableval += ",";
tableval += value;
}
N++;
}
if(tablehead.split(",").length == tableval.split(",").length){
hash.put(tablename ,tableval);
}else{
System.out.println(tablename+":表头和表内容列数异常");
}
}
}
}
return hash;
}
}