public static void main(String[] args) { try { int batchSize = 1000; //每个文件包含的元素数量 File inputFile = new File("D:\\demo.xml"); //输入文件 DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(inputFile); doc.getDocumentElement().normalize(); NodeList nodeList = doc.getElementsByTagName("Report_Entry"); //指定要分割的元素名称 int totalElements = nodeList.getLength(); int currentFileIndex = 1; for (int i = 0; i < totalElements; i += batchSize) { int endIndex = Math.min(i + batchSize, totalElements); String fileName = "output_" + currentFileIndex + ".xml"; //每个输出文件的名称 FileWriter fileWriter = new FileWriter(fileName); fileWriter.write("<?xml version='1.0' encoding='UTF-8'?><Report_Data xmlns:demo=\"urn:report/YL_test_worker\">\n"); for (int j = i; j < endIndex; j++) { Element element = (Element) nodeList.item(j); fileWriter.write(getElementXML(element)); } fileWriter.close(); currentFileIndex++; break; } } catch (Exception e) { e.printStackTrace(); } } private static String getElementXML(Element element) { String xml = ""; try { xml += "<demo:Report_Entry>\n"; xml += "<demo:WorkerID>" + element.getElementsByTagName("demo:WorkerID").item(0).getTextContent()+ "</demo:WorkerID>\n"; xml += "<demo:Full_Name>" + element.getElementsByTagName("demo:Full_Name").item(0).getTextContent() + "</wd:Full_Name>\n"; if (null != element.getElementsByTagName("demo:BUName") && null != element.getElementsByTagName("demo:BUName").item(0)){ xml += "<demo:BUName>" + element.getElementsByTagName("demo:BUName").item(0).getTextContent() + "</demo:BUName>\n"; } xml += "<demo:OrgName>" + element.getElementsByTagName("demo:OrgName").item(0).getTextContent() + "</demo:OrgName>\n"; xml += "<demo:OrgName>" + element.getElementsByTagName("demo:OrgName").item(0).getTextContent() + "</wd:OrgName>\n"; xml += "<wd:OrgID>" + element.getElementsByTagName("demo:OrgID").item(0).getTextContent() + "</wd:OrgID>\n"; xml += "<demo:Country wd:Descriptor= '" + element.getElementsByTagName("demo:Country").item(0).getAttributes().item(0).getTextContent() + "'>"; xml += "<wd:ID wd:type=\"WID\">" +element.getElementsByTagName("demo:Country").item(0).getChildNodes().item(1).getTextContent() + "</wd:ID>"; xml += "<demo:ID wd:type=\"ISO_3166-1_Alpha-2_Code\">" + element.getElementsByTagName("demo:Country").item(0).getChildNodes().item(1).getTextContent() + "</wd:ID>>"; xml += "<wd:ID wd:type=\"ISO_3166-1_Alpha-3_Code\">" + element.getElementsByTagName("wd:Country").item(0).getChildNodes().item(2).getTextContent() + "</wd:ID>>"; xml += "<demo:ID wd:type=\"ISO_3166-1_Numeric-3_Code\">" + element.getElementsByTagName("demo:Country").item(0).getChildNodes().item(3).getTextContent() + "</wd:ID>>"; xml += "</wd:Country>"; xml += "<wd:CurrentCCCode>" + element.getElementsByTagName("demo:CurrentCCCode").item(0).getTextContent() + "</wd:CurrentCCCode>\n"; xml += "<wd:CurrentCCName>" + element.getElementsByTagName("demo:CurrentCCName").item(0).getTextContent() + "</wd:CurrentCCName>\n"; xml += "<wd:IsRehire>" + element.getElementsByTagName("demo:IsRehire").item(0).getTextContent() + "</wd:IsRehire>\n"; xml += "<wd:HireDate>" + element.getElementsByTagName("demo:HireDate").item(0).getTextContent() + "</wd:HireDate>\n"; if (null != element.getElementsByTagName("wd:CCChangeDate") && null != element.getElementsByTagName("demo:CCChangeDate").item(0)){ xml += "<demo:CCChangeDate>" + element.getElementsByTagName("demo:CCChangeDate").item(0).getTextContent() + "</demo:CCChangeDate>\n"; } if (null != element.getElementsByTagName("demo:ProposeCCCode") && null != element.getElementsByTagName("demo:ProposeCCCode").item(0)){ xml += "<wd:ProposeCCCode>" + element.getElementsByTagName(demo:ProposeCCCode").item(0).getTextContent() + "</demo:ProposeCCCode>\n"; } if (null != element.getElementsByTagName("demo:ProposeCCName") && null != element.getElementsByTagName("demo:ProposeCCName").item(0)){ xml += "<wd:ProposeCCName>" + element.getElementsByTagName("demo:ProposeCCName").item(0).getTextContent() + "</demo:ProposeCCName>\n"; } xml += "</demo:Report_Entry>\n"; } catch (Exception e) { e.printStackTrace(); } return xml; }
解析 截取xml文件分割成n个文件
于 2023-03-16 18:20:28 首次发布