DOM4J解析
特征:1、JDOM的一种智能分支,它合并了许多超出基本XML文档表示的功能。
2、它使用接口和抽象基本类方法。
3、具有性能优异、灵活性好、功能强大和极端易用的特点。
4、是一个开放源码的文件
xml文件:
<bulkPmMrDataFile>
<fileHeader Attribute1 = "Value1" Attribute2 = "Value2" Attribute3 = "Value3" > </fileHeader>
<eNB id="123456">
<measurement>
<smr> AA AB AC AD AE AF AG AH </smr>
<object Attr1="val1" Attr2 = "val2" Attr3 ="val3">
<v> a b c d e f g h </v>
</object>
<object Attr1="val1" Attr2 = "val2" Attr3 ="val3">
<v> c d e d e f g h </v>
<v> e f g h a b c d </v>
</object>
</measurement>
<measurement>
<smr> aa ab ac ad ae af ag ah av </smr>
<object Attr1="val1" Attr2 = "val2" Attr3 ="val3">
<v> a b c d e f g h </v>
</object>
</measurement>
</eNB>
</bulkPmMrDataFile>
需求:将xml文件解析然后导出为csv格式,以属性为表头,属性值为列值,smr标签为表头,v标签为列值
Header是所有表头的实体类
主要代码:
public class XmlImportCsv {
/**
* loadXmlImportCSV
* 解析xml并导出csv
*/
public static void loadXmlImportCSV(String fileUrl){
String csvFileName=""; //csv文件名称
List<Header> headerList = new ArrayList<Header>();
SAXReader saxreader = new SAXReader();
File xmlFile = new File(fileUrl);
if(xmlFile.exists()) {
Document document;
try {
document = saxreader.read(xmlFile);
//System.out.println("smr标签下的内容:"+smrelem.getText());
//获取标签为object下的所有内容
List<Element> objectList = document.selectNodes("//bulkPmMrDataFile//eNB//measurement[1]//object");
int vNum = 1; //初始化行数
int cvsNum = 1; //初始化csv文件数量
for (int i = 0; i < objectList.size(); i++) {
Element object = objectList.get(i);
csvFileName="MRO"+"_"+document.getRootElement().element("eNB").attributeValue("id")+"_"+cvsNum+".csv";//以xml文件的eNBid命名
if(vNum>=(cvsNum+1)*1000000){
cvsNum++;
csvFileName="MRO"+"_"+document.getRootElement().element("eNB").attributeValue("id")+"_"+cvsNum+".csv";
exportCSV(headerList,csvFileName);
headerList=new ArrayList<>();
}
Iterator<Element> objectIterator = object.elementIterator();
while (objectIterator.hasNext()) {
vNum++;
Element v = objectIterator.next();
Header header = new Header(); //创建一个对象 header.setAttribute1(document.getRootElement().element("fileHeader").attributeValue("Attribute1"));
header.setAttribute2(document.getRootElement().element("fileHeader").attributeValue("Attribute2"));
header.setAttribute3(document.getRootElement().element("fileHeader").attributeValue("Attribute3"));
String vs = v.getText();
String[] arr = vs.split(" ");
header.setaa(arr[0].equals("NIL") ? " " : arr[0]);
header.setab(arr[1].equals("NIL") ? " " : arr[1]);
header.setac(arr[2].equals("NIL") ? " " : arr[2]);
header.setad(arr[3].equals("NIL") ? " " : arr[3]);
header.setae(arr[4].equals("NIL") ? " " : arr[4]);
header.setaf(arr[5].equals("NIL") ? " " : arr[5]);
header.setag(arr[6].equals("NIL") ? " " : arr[6]);
header.setah(arr[7].equals("NIL") ? " " : arr[7]);
header.setav(arr[8].equals("NIL") ? " " : arr[8]);
headerList.add(header);
}
}
if(vNum<1000000){
exportCSV(headerList,csvFileName); //行数小于一百万行时导出到第一个csv文件下
}
} catch (Exception e) {
e.printStackTrace();
headerList.clear();
}
}
}
public static void exportCSV(List headerList, String csvFileName){ //导出csv文件
LinkedHashMap map = new LinkedHashMap(); //写入csv表头
map.put("1", "Attribute1");
map.put("2", "Attribute2");
map.put("3", "Attribute3");
map.put("4", "AA");
map.put("5", "AB");
map.put("6", "AC");
map.put("7", "AE");
map.put("8", "AD");
map.put("9", "AF");
map.put("10", "AG");
map.put("11", "AH");
String path = Dom4jDemoTest.fileUrl;
// String path = "D://SendFloder//exportcsv//"; //csv文件导出路径
String fileds[] = new String[]{"Attribute1", "Attribute2", "Attribute3", "AA", "AB", "AC",
"AD", "AE", "AF", "AG","AH"};// 设置列英文名(也就是实体类里面对应的列名)
CSVUtils.createCSVFile(headerList, fileds, map, path, csvFileName);//调用生成csv文件的工具类
}
}
导出csv的工具类
public class CSVUtils{
/**
* 生成为CVS文件
* @param exportData 源数据List
* @param fileds 对应的实体类列
* @param map csv文件的列表头map
* @param outPutPath 文件路径
* @param csvFileName 文件名称
* @return
*/
@SuppressWarnings("rawtypes")
public static File createCSVFile(List exportData, String[] fileds, LinkedHashMap map,String outPutPath, String csvFileName) {
File csvFile = null;
BufferedWriter csvFileOutputStream = null;
try {
File file = new File(outPutPath);
if (!file.exists()) {
file.mkdir();
}
// 定义文件名格式并创建
csvFile = new File(outPutPath,csvFileName);
System.out.println("csv文件导出路径为:" + csvFile);
// UTF-8使正确读取分隔符","
csvFileOutputStream = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(csvFile), "GBK"), 1024);
//System.out.println("csvFileOutputStream" + csvFileOutputStream);
//写入文件头部
for (Iterator propertyIterator = map.entrySet().iterator(); propertyIterator.hasNext();) {
java.util.Map.Entry propertyEntry = (java.util.Map.Entry) propertyIterator.next();
csvFileOutputStream.write((String) propertyEntry.getValue() != null ? new String(
((String) propertyEntry.getValue()).getBytes("GBK"), "GBK") : "");
if (propertyIterator.hasNext()) {
csvFileOutputStream.write(",");
}
//System.out.println(new String(((String) propertyEntry.getValue()).getBytes("GBK"), "GBK"));
}
csvFileOutputStream.write("\r\n");
// 写入文件内容
for (int j = 0; exportData != null && !exportData.isEmpty()
&& j < exportData.size(); j++) {
Header t = (Header) exportData.get(j);
Class clazz = t.getClass();
String[] contents = new String[fileds.length];
for (int i = 0; fileds != null && i < fileds.length; i++) {
String filedName = toUpperCaseFirstOne(fileds[i]);
Method method = clazz.getMethod(filedName);
method.setAccessible(true);
Object obj = method.invoke(t);
String str = String.valueOf(obj);
if (str == null || str.equals("null"))
str = "";
contents[i] = str;
}
for (int n = 0; n < contents.length; n++) {
// 将生成的单元格添加到工作表中
csvFileOutputStream.write(contents[n]);
csvFileOutputStream.write(",");
}
csvFileOutputStream.write("\r\n");
}
csvFileOutputStream.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
csvFileOutputStream.close(); //关闭文件流
} catch (IOException e) {
e.printStackTrace();
}
}
return csvFile;
}
/**
* .下载文件
* @param response
* @param csvFilePath 文件路径
* @param fileName 文件名称
* @throws IOException
*/
public static void exportFile(HttpServletResponse response,
String csvFilePath, String fileName) throws IOException {
response.setContentType("application/csv;charset=GBK");
response.setHeader("Content-Disposition", "attachment; filename="
+ new String(fileName.getBytes("GBK"), "ISO8859-1"));
// URLEncoder.encode(fileName, "GBK")
InputStream in = null;
try {
in = new FileInputStream(csvFilePath);
int len = 0;
byte[] buffer = new byte[1024];
response.setCharacterEncoding("GBK");
OutputStream out = response.getOutputStream();
while ((len = in.read(buffer)) > 0) {
// out.write(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF });
out.write(buffer, 0, len);
}
} catch (FileNotFoundException e) {
System.out.println(e);
} finally {
if (in != null) {
try {
in.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
}
/**
* 删除该目录filePath下的所有文件
* @param filePath 文件目录路径
*/
public static void deleteFiles(String filePath) {
File file = new File(filePath);
if (file.exists()) {
File[] files = file.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isFile()) {
files[i].delete();
}
}
}
}
/**
* 删除单个文件
* @param filePath 文件目录路径
* @param fileName 文件名称
*/
public static void deleteFile(String filePath, String fileName) {
File file = new File(filePath);
if (file.exists()) {
File[] files = file.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isFile()) {
if (files[i].getName().equals(fileName)) {
files[i].delete();
return;
}
}
}
}
}
/**
* 将第一个字母转换为大写字母并和get拼合成方法
* @param origin
* @return
*/
private static String toUpperCaseFirstOne(String origin) {
StringBuffer sb = new StringBuffer(origin);
sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
sb.insert(0, "get");
return sb.toString();
}
}
测试类
ThreadPoolExecutorFactory是调用的异步线程类,因为要解析的文件比较多就用了线程
UnGzip是解压文件的工具类
public class Dom4jDemoTest {
public static String fileUrl;
public static void main(String [] args) {
//调用解压文件类 实现解压
UnGzip gzip = new UnGzip();
Scanner scanner = new Scanner(System.in);
System.out.print("请输入解析路径:");
fileUrl = scanner.next();
// System.out.println("输入的路径为:"+fileUrl); //D:/SendFloder/MRO
List<String> fileUrls = gzip.getAllFile(fileUrl); //解析的所有文件的路径
//获取文件夹下所有文件
for (String fileAllUrl : fileUrls){
ThreadPoolExecutorFactory.getInstance().run(() -> { //调用异步线程类
gzip.unGzipFile(fileAllUrl); //解压所有文件
loadXmlImportCSV(fileAllUrl.substring(0,fileAllUrl.length()-3)); //调用解析xml并导出csv的方法
gzip.delxmlfile(fileUrl,".xml"); //删除解压生成的xml文件
});
}
}
}
导出结果为: