解析 截取xml文件分割成n个文件

public static void main(String[] args) {
    try {
        int batchSize = 1000; //每个文件包含的元素数量
        File inputFile = new File("D:\\demo.xml"); //输入文件
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(inputFile);
        doc.getDocumentElement().normalize();
        NodeList nodeList = doc.getElementsByTagName("Report_Entry"); //指定要分割的元素名称

        int totalElements = nodeList.getLength();
        int currentFileIndex = 1;

        for (int i = 0; i < totalElements; i += batchSize) {
            int endIndex = Math.min(i + batchSize, totalElements);
            String fileName = "output_" + currentFileIndex + ".xml"; //每个输出文件的名称
            FileWriter fileWriter = new FileWriter(fileName);
            fileWriter.write("<?xml version='1.0' encoding='UTF-8'?><Report_Data xmlns:demo=\"urn:report/YL_test_worker\">\n");

            for (int j = i; j < endIndex; j++) {
                Element element = (Element) nodeList.item(j);
                fileWriter.write(getElementXML(element));
            }

            fileWriter.close();
            currentFileIndex++;
            break;
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

private static String getElementXML(Element element) {
    String xml = "";
    try {
        xml += "<demo:Report_Entry>\n";
        xml += "<demo:WorkerID>" + element.getElementsByTagName("demo:WorkerID").item(0).getTextContent()+ "</demo:WorkerID>\n";
        xml += "<demo:Full_Name>" + element.getElementsByTagName("demo:Full_Name").item(0).getTextContent() + "</wd:Full_Name>\n";
        if (null != element.getElementsByTagName("demo:BUName") && null != element.getElementsByTagName("demo:BUName").item(0)){
            xml += "<demo:BUName>" + element.getElementsByTagName("demo:BUName").item(0).getTextContent() + "</demo:BUName>\n";
        }
        





        xml += "<demo:OrgName>" + element.getElementsByTagName("demo:OrgName").item(0).getTextContent() + "</demo:OrgName>\n";
        xml += "<demo:OrgName>" + element.getElementsByTagName("demo:OrgName").item(0).getTextContent() + "</wd:OrgName>\n";
        xml += "<wd:OrgID>" + element.getElementsByTagName("demo:OrgID").item(0).getTextContent() + "</wd:OrgID>\n";

        xml += "<demo:Country wd:Descriptor= '" + element.getElementsByTagName("demo:Country").item(0).getAttributes().item(0).getTextContent() + "'>";
        xml += "<wd:ID wd:type=\"WID\">" +element.getElementsByTagName("demo:Country").item(0).getChildNodes().item(1).getTextContent() + "</wd:ID>";
        xml += "<demo:ID wd:type=\"ISO_3166-1_Alpha-2_Code\">" + element.getElementsByTagName("demo:Country").item(0).getChildNodes().item(1).getTextContent() + "</wd:ID>>";
        xml += "<wd:ID wd:type=\"ISO_3166-1_Alpha-3_Code\">" + element.getElementsByTagName("wd:Country").item(0).getChildNodes().item(2).getTextContent() + "</wd:ID>>";
        xml += "<demo:ID wd:type=\"ISO_3166-1_Numeric-3_Code\">" + element.getElementsByTagName("demo:Country").item(0).getChildNodes().item(3).getTextContent() + "</wd:ID>>";
        xml += "</wd:Country>";
        
        xml += "<wd:CurrentCCCode>" + element.getElementsByTagName("demo:CurrentCCCode").item(0).getTextContent() + "</wd:CurrentCCCode>\n";
        xml += "<wd:CurrentCCName>" + element.getElementsByTagName("demo:CurrentCCName").item(0).getTextContent() + "</wd:CurrentCCName>\n";
        xml += "<wd:IsRehire>" + element.getElementsByTagName("demo:IsRehire").item(0).getTextContent() + "</wd:IsRehire>\n";
        xml += "<wd:HireDate>" + element.getElementsByTagName("demo:HireDate").item(0).getTextContent() + "</wd:HireDate>\n";
        if (null != element.getElementsByTagName("wd:CCChangeDate") && null != element.getElementsByTagName("demo:CCChangeDate").item(0)){
            xml += "<demo:CCChangeDate>" + element.getElementsByTagName("demo:CCChangeDate").item(0).getTextContent() + "</demo:CCChangeDate>\n";
        }
        if (null != element.getElementsByTagName("demo:ProposeCCCode") && null != element.getElementsByTagName("demo:ProposeCCCode").item(0)){
            xml += "<wd:ProposeCCCode>" + element.getElementsByTagName(demo:ProposeCCCode").item(0).getTextContent() + "</demo:ProposeCCCode>\n";
        }
        if (null != element.getElementsByTagName("demo:ProposeCCName") && null != element.getElementsByTagName("demo:ProposeCCName").item(0)){
            xml += "<wd:ProposeCCName>" + element.getElementsByTagName("demo:ProposeCCName").item(0).getTextContent() + "</demo:ProposeCCName>\n";
        }
        xml += "</demo:Report_Entry>\n";
    } catch (Exception e) {
        e.printStackTrace();
    }
    return xml;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值