Dom4j解析xml文件并导出到csv中

最新推荐文章于 2021-06-22 14:49:18 发布

señoritaw

最新推荐文章于 2021-06-22 14:49:18 发布

阅读量663

点赞数 2

文章标签： xml

本文链接：https://blog.csdn.net/a_rain2333/article/details/85161947

版权

DOM4J解析

　　　 特征：1、JDOM的一种智能分支，它合并了许多超出基本XML文档表示的功能。

　　　　　　 2、它使用接口和抽象基本类方法。

　　　　　　 3、具有性能优异、灵活性好、功能强大和极端易用的特点。

　　　　　　 4、是一个开放源码的文件

　xml文件：

<bulkPmMrDataFile>
    <fileHeader Attribute1 = "Value1"  Attribute2 = "Value2"  Attribute3 = "Value3" >  </fileHeader>
    <eNB id="123456">
        <measurement>
            <smr> AA AB AC AD AE AF AG AH </smr>
            <object Attr1="val1" Attr2 = "val2" Attr3 ="val3">
                <v> a b c d e f g h </v>
            </object>
            <object Attr1="val1" Attr2 = "val2" Attr3 ="val3">
                <v> c d e d e f g h </v>
                <v> e f g h a b c d </v>
            </object>
        </measurement>
        <measurement>
            <smr> aa ab ac ad ae af ag ah av </smr>
            <object Attr1="val1" Attr2 = "val2" Attr3 ="val3">
                <v> a b c d e f g h </v>
            </object>
        </measurement>
    </eNB>
</bulkPmMrDataFile>

需求：将xml文件解析然后导出为csv格式，以属性为表头，属性值为列值，smr标签为表头，v标签为列值

Header是所有表头的实体类

主要代码：

public class XmlImportCsv {
    /**
     * loadXmlImportCSV
     * 解析xml并导出csv
     */
    public static  void loadXmlImportCSV(String fileUrl){
        String csvFileName="";      //csv文件名称
        List<Header> headerList = new ArrayList<Header>();
        SAXReader saxreader = new SAXReader();
        File xmlFile = new File(fileUrl);
        if(xmlFile.exists()) {
            Document document;
            try {
                document = saxreader.read(xmlFile);
                //System.out.println("smr标签下的内容："+smrelem.getText());
                //获取标签为object下的所有内容
                List<Element> objectList = document.selectNodes("//bulkPmMrDataFile//eNB//measurement[1]//object");
                int vNum = 1; //初始化行数
                int cvsNum = 1; //初始化csv文件数量
                for (int i = 0; i < objectList.size(); i++) {
                    Element object = objectList.get(i);
                    csvFileName="MRO"+"_"+document.getRootElement().element("eNB").attributeValue("id")+"_"+cvsNum+".csv";//以xml文件的eNBid命名
                    if(vNum>=(cvsNum+1)*1000000){
                        cvsNum++;
                        csvFileName="MRO"+"_"+document.getRootElement().element("eNB").attributeValue("id")+"_"+cvsNum+".csv";
                        exportCSV(headerList,csvFileName);
                        headerList=new ArrayList<>();
                    }

                    Iterator<Element> objectIterator = object.elementIterator();
                    while (objectIterator.hasNext()) {
                        vNum++;
                        Element v = objectIterator.next();
                        Header header = new Header();    //创建一个对象  header.setAttribute1(document.getRootElement().element("fileHeader").attributeValue("Attribute1"));
                        header.setAttribute2(document.getRootElement().element("fileHeader").attributeValue("Attribute2"));
                        header.setAttribute3(document.getRootElement().element("fileHeader").attributeValue("Attribute3"));
                        String vs = v.getText();
                        String[] arr = vs.split(" ");
                        header.setaa(arr[0].equals("NIL") ? " " : arr[0]);
                        header.setab(arr[1].equals("NIL") ? " " : arr[1]);
                        header.setac(arr[2].equals("NIL") ? " " : arr[2]);
                        header.setad(arr[3].equals("NIL") ? " " : arr[3]);
                        header.setae(arr[4].equals("NIL") ? " " : arr[4]);
                        header.setaf(arr[5].equals("NIL") ? " " : arr[5]);
                        header.setag(arr[6].equals("NIL") ? " " : arr[6]);
                        header.setah(arr[7].equals("NIL") ? " " : arr[7]);
                        header.setav(arr[8].equals("NIL") ? " " : arr[8]);
                        headerList.add(header);
                    }
                }
                if(vNum<1000000){
                    exportCSV(headerList,csvFileName);  //行数小于一百万行时导出到第一个csv文件下
                }
            } catch (Exception e) {
                e.printStackTrace();
                headerList.clear();
            }
        }
    }

    public static void exportCSV(List headerList, String csvFileName){ //导出csv文件
        LinkedHashMap map = new LinkedHashMap();        //写入csv表头
        map.put("1", "Attribute1");
        map.put("2", "Attribute2");
        map.put("3", "Attribute3");
        map.put("4", "AA");
        map.put("5", "AB");
        map.put("6", "AC");
        map.put("7", "AE");
        map.put("8", "AD");
        map.put("9", "AF");
        map.put("10", "AG");
        map.put("11", "AH");
        String path = Dom4jDemoTest.fileUrl;
//        String path = "D://SendFloder//exportcsv//";        //csv文件导出路径
        String fileds[] = new String[]{"Attribute1", "Attribute2", "Attribute3", "AA", "AB", "AC",
                 "AD", "AE", "AF", "AG","AH"};// 设置列英文名（也就是实体类里面对应的列名）

        CSVUtils.createCSVFile(headerList, fileds, map, path, csvFileName);//调用生成csv文件的工具类
    }
}

导出csv的工具类


public class CSVUtils{

	/**
	 * 生成为CVS文件
	 * @param exportData 源数据List
	 * @param fileds 对应的实体类列
	 * @param map csv文件的列表头map
	 * @param outPutPath 文件路径
	 * @param csvFileName 文件名称
	 * @return
	 */
	@SuppressWarnings("rawtypes")
	public static File createCSVFile(List exportData, String[] fileds, LinkedHashMap map,String outPutPath, String csvFileName) {
		File csvFile = null;
		BufferedWriter csvFileOutputStream = null;
		try {
			File file = new File(outPutPath);
			if (!file.exists()) {
				file.mkdir();
			}
			// 定义文件名格式并创建
			csvFile = new File(outPutPath,csvFileName);
			System.out.println("csv文件导出路径为：" + csvFile);
			// UTF-8使正确读取分隔符","
			csvFileOutputStream = new BufferedWriter(new OutputStreamWriter(
					new FileOutputStream(csvFile), "GBK"), 1024);
			//System.out.println("csvFileOutputStream" + csvFileOutputStream);

			//写入文件头部
			for (Iterator propertyIterator = map.entrySet().iterator(); propertyIterator.hasNext();) {
				java.util.Map.Entry propertyEntry = (java.util.Map.Entry) propertyIterator.next();
				csvFileOutputStream.write((String) propertyEntry.getValue() != null ? new String(
								((String) propertyEntry.getValue()).getBytes("GBK"), "GBK") : "");
				if (propertyIterator.hasNext()) {
					csvFileOutputStream.write(",");
				}
				//System.out.println(new String(((String) propertyEntry.getValue()).getBytes("GBK"), "GBK"));
			}
			csvFileOutputStream.write("\r\n");

			// 写入文件内容
			for (int j = 0; exportData != null && !exportData.isEmpty()
					&& j < exportData.size(); j++) {
				Header t = (Header) exportData.get(j);
				Class clazz = t.getClass();
				String[] contents = new String[fileds.length];
				for (int i = 0; fileds != null && i < fileds.length; i++) {
					String filedName = toUpperCaseFirstOne(fileds[i]);
					Method method = clazz.getMethod(filedName);
					method.setAccessible(true);
					Object obj = method.invoke(t);
					String str = String.valueOf(obj);
					if (str == null || str.equals("null"))
						str = "";
					contents[i] = str;
				}
				for (int n = 0; n < contents.length; n++) {
					// 将生成的单元格添加到工作表中
					csvFileOutputStream.write(contents[n]);
					csvFileOutputStream.write(",");
 
				}
				csvFileOutputStream.write("\r\n");
			}
			csvFileOutputStream.flush();
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				csvFileOutputStream.close();	//关闭文件流
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		return csvFile;
	}

	/**
	 * .下载文件
	 * @param response
	 * @param csvFilePath 文件路径
	 * @param fileName    文件名称
	 * @throws IOException
	 */
	public static void exportFile(HttpServletResponse response,
			String csvFilePath, String fileName) throws IOException {
		response.setContentType("application/csv;charset=GBK");
		response.setHeader("Content-Disposition", "attachment;  filename="
				+ new String(fileName.getBytes("GBK"), "ISO8859-1"));
		// URLEncoder.encode(fileName, "GBK")
 
		InputStream in = null;
		try {
			in = new FileInputStream(csvFilePath);
			int len = 0;
			byte[] buffer = new byte[1024];
			response.setCharacterEncoding("GBK");
			OutputStream out = response.getOutputStream();
			while ((len = in.read(buffer)) > 0) {
				// out.write(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF });
				out.write(buffer, 0, len);
			}
		} catch (FileNotFoundException e) {
			System.out.println(e);
		} finally {
			if (in != null) {
				try {
					in.close();
				} catch (Exception e) {
					throw new RuntimeException(e);
				}
			}
		}
	}

	/**
	 * 删除该目录filePath下的所有文件
	 * @param filePath 文件目录路径
	 */
	public static void deleteFiles(String filePath) {
		File file = new File(filePath);
		if (file.exists()) {
			File[] files = file.listFiles();
			for (int i = 0; i < files.length; i++) {
				if (files[i].isFile()) {
					files[i].delete();
				}
			}
		}
	}

	/**
	 * 删除单个文件
	 * @param filePath   文件目录路径
	 * @param fileName   文件名称
	 */
	public static void deleteFile(String filePath, String fileName) {
		File file = new File(filePath);
		if (file.exists()) {
			File[] files = file.listFiles();
			for (int i = 0; i < files.length; i++) {
				if (files[i].isFile()) {
					if (files[i].getName().equals(fileName)) {
						files[i].delete();
						return;
					}
				}
			}
		}
	}

	/**
	 * 将第一个字母转换为大写字母并和get拼合成方法
	 * @param origin
	 * @return
	 */
	private static String toUpperCaseFirstOne(String origin) {
		StringBuffer sb = new StringBuffer(origin);
		sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
		sb.insert(0, "get");
		return sb.toString();
	}
}

测试类

ThreadPoolExecutorFactory是调用的异步线程类，因为要解析的文件比较多就用了线程

UnGzip是解压文件的工具类


public class Dom4jDemoTest {
	public static String fileUrl;

	public static void main(String [] args) {
		//调用解压文件类 实现解压
		UnGzip gzip = new UnGzip();
		Scanner scanner  = new Scanner(System.in);
		System.out.print("请输入解析路径：");
		fileUrl = scanner.next();
//		System.out.println("输入的路径为："+fileUrl);		//D:/SendFloder/MRO
		List<String> fileUrls = gzip.getAllFile(fileUrl);	//解析的所有文件的路径
		//获取文件夹下所有文件
		for (String fileAllUrl : fileUrls){
			ThreadPoolExecutorFactory.getInstance().run(() -> { //调用异步线程类
				gzip.unGzipFile(fileAllUrl);	//解压所有文件
				loadXmlImportCSV(fileAllUrl.substring(0,fileAllUrl.length()-3));  //调用解析xml并导出csv的方法
				gzip.delxmlfile(fileUrl,".xml");	//删除解压生成的xml文件
			});
		}
	}
}

导出结果为：

señoritaw

关注

2
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
Dom4j解析xml文件并导出到csv中

DOM4J解析　　　特征：1、JDOM的一种智能分支，它合并了许多超出基本XML文档表示的功能。　　　　　　 2、它使用接口和抽象基本类方法。　　　　　　 3、具有性能优异、灵活性好、功能强大和极端易用的特点。　　　　　　 4、是一个开放源码的文件　xml文件：&lt;bulkPmMrDataFile&gt; &lt;fileHeader Attribu...
复制链接

扫一扫