一种xml解析二次处理的方法(根据剔除规则仅保留部分二级节点)

处理前的xml文件:

<?xml version="1.0" encoding="UTF-8"?>
<product_set>
    <product>
        <product_id>614944313705</product_id>
        <click_sku_id>614944313705</click_sku_id>
        <product_name><![CDATA[oppo钢化水凝膜a3全屏覆盖a3s无白边a1防摔爆护眼抗蓝光膜oppoa3手机屏幕oqqo贴膜软膜oppoa3s保护膜oppoa1]]></product_name>
        <expiration_time>2022-12-14 01:12:14</expiration_time>
        <image_url><![CDATA[https://img.alicdn.com/imgextra/i1/2206948455447/O1CN01kkILjC1q6lxhX85iX_!!0-item_pic.jpg]]></image_url>
        <target_url_mobile><![CDATA[https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz=tm&force_no_smb=true&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&dpa_material_id=614944313705&dpa_material_type=1&dpa_source_code=10082&itemIds=614944313705&spm=2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_mobile>
        <target_url_android><![CDATA[tbopen://m.taobao.com/tbopen/index.html?action=ali.open.nav&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&bootImage=0&dpa_material_id=614944313705&dpa_material_type=1&dpa_source_code=10082&itemIds=614944313705&module=h5&source=auto&spm=2014.ugdhh.3843640202.100-450957-2089633783&h5Url=https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz%3Dtm%26force_no_smb%3Dtrue%26bc_fl_src%3Dgrowth_dhh_3843640202_100-450957-2089633783%26dpa_material_id%3D614944313705%26dpa_material_type%3D1%26dpa_source_code%3D10082%26itemIds%3D614944313705%26spm%3D2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_android>
        <target_url_ios><![CDATA[tbopen://m.taobao.com/tbopen/index.html?action=ali.open.nav&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&bootImage=0&dpa_material_id=614944313705&dpa_material_type=1&dpa_source_code=10082&itemIds=614944313705&module=h5&source=auto&spm=2014.ugdhh.3843640202.100-450957-2089633783&h5Url=https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz%3Dtm%26force_no_smb%3Dtrue%26bc_fl_src%3Dgrowth_dhh_3843640202_100-450957-2089633783%26dpa_material_id%3D614944313705%26dpa_material_type%3D1%26dpa_source_code%3D10082%26itemIds%3D614944313705%26spm%3D2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_ios>
        <product_short_name><![CDATA[总副 oppo a3水凝膜]]></product_short_name>
        <third_category_id>50012587</third_category_id>
        <third_category_name><![CDATA[手机贴膜]]></third_category_name>
        <price>10</price>
        <sale_price>10</sale_price>
    </product>
    <product>
        <product_id>615130411096</product_id>
        <click_sku_id>615130411096</click_sku_id>
        <product_name><![CDATA[小白嘴山药铁棍山药新鲜蔬菜铁杆淮山药5斤装非河南焦作温县垆土]]></product_name>
        <expiration_time>2022-12-14 01:12:14</expiration_time>
        <image_url><![CDATA[https://img.alicdn.com/imgextra/i2/2207816494036/O1CN01FQQp1Q1fgX74GLegt_!!0-item_pic.jpg]]></image_url>
        <target_url_mobile><![CDATA[https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz=tm&force_no_smb=true&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&dpa_material_id=615130411096&dpa_material_type=1&dpa_source_code=10082&itemIds=615130411096&spm=2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_mobile>
        <target_url_android><![CDATA[tbopen://m.taobao.com/tbopen/index.html?action=ali.open.nav&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&bootImage=0&dpa_material_id=615130411096&dpa_material_type=1&dpa_source_code=10082&itemIds=615130411096&module=h5&source=auto&spm=2014.ugdhh.3843640202.100-450957-2089633783&h5Url=https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz%3Dtm%26force_no_smb%3Dtrue%26bc_fl_src%3Dgrowth_dhh_3843640202_100-450957-2089633783%26dpa_material_id%3D615130411096%26dpa_material_type%3D1%26dpa_source_code%3D10082%26itemIds%3D615130411096%26spm%3D2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_android>
        <target_url_ios><![CDATA[tbopen://m.taobao.com/tbopen/index.html?action=ali.open.nav&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&bootImage=0&dpa_material_id=615130411096&dpa_material_type=1&dpa_source_code=10082&itemIds=615130411096&module=h5&source=auto&spm=2014.ugdhh.3843640202.100-450957-2089633783&h5Url=https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz%3Dtm%26force_no_smb%3Dtrue%26bc_fl_src%3Dgrowth_dhh_3843640202_100-450957-2089633783%26dpa_material_id%3D615130411096%26dpa_material_type%3D1%26dpa_source_code%3D10082%26itemIds%3D615130411096%26spm%3D2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_ios>
        <product_short_name><![CDATA[小白嘴山药铁棍山药新鲜蔬菜铁杆淮山药5斤]]></product_short_name>
        <third_category_id>50050721</third_category_id>
        <third_category_name><![CDATA[新鲜山药]]></third_category_name>
        <price>66</price>
        <sale_price>66</sale_price>
    </product>
    <product>
        <product_id>615818020475</product_id>
        <click_sku_id>615818020475</click_sku_id>
        <product_name><![CDATA[发光掏耳神器挖耳勺带灯宝宝儿童采耳掏耳朵淘扣工具套装可视耳屎]]></product_name>
        <expiration_time>2022-12-14 01:12:14</expiration_time>
        <image_url><![CDATA[https://img.alicdn.com/imgextra/i3/4185085665/O1CN017n2fTh1ricLAKCRgr_!!0-item_pic.jpg]]></image_url>
        <target_url_mobile><![CDATA[https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz=tm&force_no_smb=true&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&dpa_material_id=615818020475&dpa_material_type=1&dpa_source_code=10082&itemIds=615818020475&spm=2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_mobile>
        <target_url_android><![CDATA[tbopen://m.taobao.com/tbopen/index.html?action=ali.open.nav&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&bootImage=0&dpa_material_id=615818020475&dpa_material_type=1&dpa_source_code=10082&itemIds=615818020475&module=h5&source=auto&spm=2014.ugdhh.3843640202.100-450957-2089633783&h5Url=https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz%3Dtm%26force_no_smb%3Dtrue%26bc_fl_src%3Dgrowth_dhh_3843640202_100-450957-2089633783%26dpa_material_id%3D615818020475%26dpa_material_type%3D1%26dpa_source_code%3D10082%26itemIds%3D615818020475%26spm%3D2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_android>
        <target_url_ios><![CDATA[tbopen://m.taobao.com/tbopen/index.html?action=ali.open.nav&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&bootImage=0&dpa_material_id=615818020475&dpa_material_type=1&dpa_source_code=10082&itemIds=615818020475&module=h5&source=auto&spm=2014.ugdhh.3843640202.100-450957-2089633783&h5Url=https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz%3Dtm%26force_no_smb%3Dtrue%26bc_fl_src%3Dgrowth_dhh_3843640202_100-450957-2089633783%26dpa_material_id%3D615818020475%26dpa_material_type%3D1%26dpa_source_code%3D10082%26itemIds%3D615818020475%26spm%3D2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_ios>
        <product_short_name><![CDATA[PROMISE/承诺 008]]></product_short_name>
        <third_category_id>50007005</third_category_id>
        <third_category_name><![CDATA[耳勺]]></third_category_name>
        <price>38</price>
        <sale_price>38</sale_price>
    </product>
</product_set>

处理后的xml文件:

<?xml version="1.0" encoding="UTF-8"?>

<product_set>
    <product>
        <product_id>614944313705</product_id>
        <click_sku_id>614944313705</click_sku_id>
        <product_name><![CDATA[oppo钢化水凝膜a3全屏覆盖a3s无白边a1防摔爆护眼抗蓝光膜oppoa3手机屏幕oqqo贴膜软膜oppoa3s保护膜oppoa1]]></product_name>
        <expiration_time>2022-12-14 01:12:14</expiration_time>
        <image_url><![CDATA[https://img.alicdn.com/imgextra/i1/2206948455447/O1CN01kkILjC1q6lxhX85iX_!!0-item_pic.jpg]]></image_url>
        <target_url_mobile><![CDATA[https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz=tm&force_no_smb=true&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&dpa_material_id=614944313705&dpa_material_type=1&dpa_source_code=10082&itemIds=614944313705&spm=2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_mobile>
        <target_url_android><![CDATA[tbopen://m.taobao.com/tbopen/index.html?action=ali.open.nav&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&bootImage=0&dpa_material_id=614944313705&dpa_material_type=1&dpa_source_code=10082&itemIds=614944313705&module=h5&source=auto&spm=2014.ugdhh.3843640202.100-450957-2089633783&h5Url=https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz%3Dtm%26force_no_smb%3Dtrue%26bc_fl_src%3Dgrowth_dhh_3843640202_100-450957-2089633783%26dpa_material_id%3D614944313705%26dpa_material_type%3D1%26dpa_source_code%3D10082%26itemIds%3D614944313705%26spm%3D2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_android>
        <target_url_ios><![CDATA[tbopen://m.taobao.com/tbopen/index.html?action=ali.open.nav&bc_fl_src=growth_dhh_3843640202_100-450957-2089633783&bootImage=0&dpa_material_id=614944313705&dpa_material_type=1&dpa_source_code=10082&itemIds=614944313705&module=h5&source=auto&spm=2014.ugdhh.3843640202.100-450957-2089633783&h5Url=https://huodong.taobao.com/wow/z/usergrowth/outside/dpa_module?wh_biz%3Dtm%26force_no_smb%3Dtrue%26bc_fl_src%3Dgrowth_dhh_3843640202_100-450957-2089633783%26dpa_material_id%3D614944313705%26dpa_material_type%3D1%26dpa_source_code%3D10082%26itemIds%3D614944313705%26spm%3D2014.ugdhh.3843640202.100-450957-2089633783]]></target_url_ios>
        <product_short_name><![CDATA[总副 oppo a3水凝膜]]></product_short_name>
        <third_category_id>50012587</third_category_id>
        <third_category_name><![CDATA[手机贴膜]]></third_category_name>
        <price>10</price>
        <sale_price>10</sale_price>
    </product>
</product_set>

具体步骤

1、引入jar

<dependency>
    <groupId>dom4j</groupId>
    <artifactId>dom4j</artifactId>
    <version>1.1</version>
    <scope>test</scope>
</dependency>

2、核心代码

package com.shiyu.test;

import com.alibaba.fastjson.JSON;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;

import java.io.*;
import java.util.*;

/**
 * @author shiyu
 * @since 2020/12/14
 */
public class MyTest {
	private static final String LABEL_LIST = "其他配件,剃须刀,口罩,台历,咸鸭蛋,围裙,地垫,地毯,垃圾桶,垃圾袋,定制窗帘,居家鞋,山核桃,成品窗帘,扇子,手机贴膜,护发素,护目镜,挂钟,松子,柠檬,椅垫,榴莲,沐浴球,沐浴露,沙发垫,洗发水,洗手液,洗护套装,浴巾,浴帘杆,海参,海绵擦,湿巾,漱口水,牙膏,牛排,番茄,百洁布,眼罩,眼霜,移动电源,管道疏通器,糖果,纽扣,芒果,芒果干,苹果,隔音耳塞,面膜,鞋垫,飘窗垫,餐垫,香皂";
	
	public static void main(String[] args) {
		try {
			test();
		} catch(Exception e) {
			e.printStackTrace();
		}
	}
	
	public static void test() throws Exception {
		File source = new File("resources/test.xml");
		long time = System.currentTimeMillis();
		File target = new File("resources/test_" + time + ".xml");
		
		int sum = 0, available = 0;
		String[] keywords = LABEL_LIST.split(",");
		Map<String, Integer> map = new HashMap<>();
		
		//1.创建Reader对象
		SAXReader reader = new SAXReader();
		//2.加载xml
		Document doc = reader.read(source);
		//3.获取根节点
		Element root = doc.getRootElement();
		
		Document newDoc = DocumentHelper.createDocument();
		Element newRoot = newDoc.addElement(root.getQName());
		
		//4.遍历所有子节点
		Iterator iterator = root.elementIterator();
		while(iterator.hasNext()) {
			sum++;
			Element stu = (Element) iterator.next();
			List<Attribute> attributes = stu.attributes();
			for(Attribute attribute : attributes) {
				System.out.println(attribute.getValue());
			}
			Iterator iterator1 = stu.elementIterator();
			while(iterator1.hasNext()) {
				Element stuChild = (Element) iterator1.next();
				//System.out.println("节点名:" + stuChild.getName() + "---节点值:" + stuChild.getStringValue());
				if(stuChild.getStringValue().length() > 1) {
					for(String keyword : keywords) {
						if(stuChild.getStringValue().contains(keyword)) {
							if(map.containsKey(keyword)) {
								if(map.get(keyword) > 20){
									continue;
								}
								map.put(keyword, map.get(keyword)+1);
							} else {
								map.put(keyword, 1);
							}
							newRoot.add((Element) stu.clone());
							available++;
						}
					}
				}
			}
			// 限制上限2000
			if(available > 200) break;
		}
		
		// xml格式化样式
		OutputFormat format = OutputFormat.createPrettyPrint(); // 默认样式
		format.setIndentSize(4);                                // 行缩进重置
		
		// 输出xml文件
		XMLWriter writer = new XMLWriter(new FileOutputStream(target), format);
		writer.write(newDoc);
		System.out.println("共:" + sum + ", 符合:" + available);
		System.out.println("匹配详情: " + JSON.toJSONString(map));
	}
}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值