xstream测试(xstream-1.3.1.jar、xpp3_min-1.1.4c.jar)

package xstreamTest;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

import com.thoughtworks.xstream.XStream;
import com.thoughtworks.xstream.io.xml.DomDriver;

public class ExtractorXpathConfig {

    public static void main(String[] args) {
        ExtractorXpathConfig.write();
         ExtractorXpathConfig.read();
    }

    public static void write() {
        XStream sm = new XStream(new DomDriver());

        FilterConfig fc = new FilterConfig();
        List<SeedConfig> seedConfig = new ArrayList<SeedConfig>();
        List<ExtractorConfig> extratorConfig = new ArrayList<ExtractorConfig>();
        List<WriterConfig> writerConfig = new ArrayList<WriterConfig>();

        SeedConfig sc = new SeedConfig("http://www.qq.com/");
        SeedConfig sc1 = new SeedConfig("http://www.sina.com/");
        seedConfig.add(sc);
        seedConfig.add(sc1);

        ExtractorConfig ec = new ExtractorConfig(
                "只抓取配置div/a/@href|div/h1/a/@href", "只解析配置div/a/@href|",
                "即抓取又解析功能配置div/a/@href");
        extratorConfig.add(ec);

        WriterConfig wc = new WriterConfig("singerName",
                "div/a/@href|div/h1/a/");
        WriterConfig wc1 = new WriterConfig("singerGender",
                "div/a/@href|div/h1/gender");
        WriterConfig wc2 = new WriterConfig("singerAge",
                "div/a/@href|div/h1/age");
        WriterConfig wc3 = new WriterConfig("singerCountry",
                "div/a/@href|div/h1/age");
        wc3.setOtherConfig("福建省地方第三方");
        writerConfig.add(wc);
        writerConfig.add(wc1);
        writerConfig.add(wc2);
        writerConfig.add(wc3);

        fc.setExtratorConfig(extratorConfig);
        fc.setSeedConfig(seedConfig);
        fc.setWriterConfig(writerConfig);

        try {
            OutputStream out = new FileOutputStream(new File("xpathConfig.xml"));
            OutputStreamWriter writer = new OutputStreamWriter(out, Charset
                    .forName("utf-8"));
            writer.write("/n");
            sm.toXML(fc, writer);
            out.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void read() {
        XStream sm = new XStream(new DomDriver());
        try {
            InputStream in = new FileInputStream(new File("xpathConfig.xml"));
            InputStreamReader reader = new InputStreamReader(in, Charset
                    .forName("utf-8"));
            FilterConfig fc = (FilterConfig) sm.fromXML(in);
            System.out.println(fc.getCharSet());
            List<ExtractorConfig> extratorConfig = fc.getExtratorConfig();
            for(ExtractorConfig ec:extratorConfig){
                System.out.println(ec.getBothUrls());
                System.out.println(ec.getFetchUrls());
                System.out.println(ec.getWriteUrls());
            }
           
            in.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

/**
 * 配置模板类
 *
 * @author ffshi
 *
 */
class FilterConfig {
    private List<SeedConfig> seedConfig;
    private List<ExtractorConfig> extratorConfig;
    private List<WriterConfig> writerConfig;
    private String charSet = "UTF-8";

    public String getCharSet() {
        return charSet;
    }

    public void setCharSet(String charSet) {
        this.charSet = charSet;
    }

    public FilterConfig() {
        seedConfig = new ArrayList<SeedConfig>();
        extratorConfig = new ArrayList<ExtractorConfig>();
        writerConfig = new ArrayList<WriterConfig>();
    }

    public List<SeedConfig> getSeedConfig() {
        return seedConfig;
    }

    public void setSeedConfig(List<SeedConfig> seedConfig) {
        this.seedConfig = seedConfig;
    }

    public List<ExtractorConfig> getExtratorConfig() {
        return extratorConfig;
    }

    public void setExtratorConfig(List<ExtractorConfig> extratorConfig) {
        this.extratorConfig = extratorConfig;
    }

    public List<WriterConfig> getWriterConfig() {
        return writerConfig;
    }

    public void setWriterConfig(List<WriterConfig> writerConfig) {
        this.writerConfig = writerConfig;
    }

}

/**
 * 种子页设置bean
 *
 * @author ffshi
 *
 */
class SeedConfig {
    private String seed;

    public SeedConfig() {
    }

    public String getSeed() {
        return seed;
    }

    public void setSeed(String seed) {
        this.seed = seed;
    }

    public SeedConfig(String seed) {
        super();
        this.seed = seed;
    }

}

/**
 * url过滤配置类
 *
 * @author ffshi
 *
 */
class ExtractorConfig {
    private String fetchUrls;
    private String writeUrls;
    private String bothUrls;

    public ExtractorConfig() {
    }

    public ExtractorConfig(String fetchUrls, String writeUrls, String bothUrls) {
        super();
        this.bothUrls = bothUrls;
        this.fetchUrls = fetchUrls;
        this.writeUrls = writeUrls;
    }

    public String getFetchUrls() {
        return fetchUrls;
    }

    public void setFetchUrls(String fetchUrls) {
        this.fetchUrls = fetchUrls;
    }

    public String getWriteUrls() {
        return writeUrls;
    }

    public void setWriteUrls(String writeUrls) {
        this.writeUrls = writeUrls;
    }

    public String getBothUrls() {
        return bothUrls;
    }

    public void setBothUrls(String bothUrls) {
        this.bothUrls = bothUrls;
    }

}

/**
 * 结构化抽取的配置类
 *
 * @author ffshi
 *
 */
class WriterConfig {
    private String fieldName;
    private String fieldXpath;
    private String otherConfig;
    private boolean bool;

    public boolean isBool() {
        return bool;
    }

    public void setBool(boolean bool) {
        this.bool = bool;
    }

    public WriterConfig() {
    }

    public WriterConfig(String fieldName, String fieldXpath) {
        super();
        this.fieldName = fieldName;
        this.fieldXpath = fieldXpath;
    }

    public String getFieldName() {
        return fieldName;
    }

    public void setFieldName(String fieldName) {
        this.fieldName = fieldName;
    }

    public String getFieldXpath() {
        return fieldXpath;
    }

    public void setFieldXpath(String fieldXpath) {
        this.fieldXpath = fieldXpath;
    }

    public String getOtherConfig() {
        return otherConfig;
    }

    public void setOtherConfig(String otherConfig) {
        this.otherConfig = otherConfig;
    }

}

 

生成的xml格式如下:


<xstreamTest.FilterConfig>
  <seedConfig>
    <xstreamTest.SeedConfig>
      <seed>http://www.qq.com/</seed>
    </xstreamTest.SeedConfig>
    <xstreamTest.SeedConfig>
      <seed>http://www.sina.com/</seed>
    </xstreamTest.SeedConfig>
  </seedConfig>
  <extratorConfig>
    <xstreamTest.ExtractorConfig>
      <fetchUrls>只抓取配置div/a/@href|div/h1/a/@href</fetchUrls>
      <writeUrls>只解析配置div/a/@href|</writeUrls>
      <bothUrls>即抓取又解析功能配置div/a/@href</bothUrls>
    </xstreamTest.ExtractorConfig>
  </extratorConfig>
  <writerConfig>
    <xstreamTest.WriterConfig>
      <fieldName>singerName</fieldName>
      <fieldXpath>div/a/@href|div/h1/a/</fieldXpath>
      <bool>false</bool>
    </xstreamTest.WriterConfig>
    <xstreamTest.WriterConfig>
      <fieldName>singerGender</fieldName>
      <fieldXpath>div/a/@href|div/h1/gender</fieldXpath>
      <bool>false</bool>
    </xstreamTest.WriterConfig>
    <xstreamTest.WriterConfig>
      <fieldName>singerAge</fieldName>
      <fieldXpath>div/a/@href|div/h1/age</fieldXpath>
      <bool>false</bool>
    </xstreamTest.WriterConfig>
    <xstreamTest.WriterConfig>
      <fieldName>singerCountry</fieldName>
      <fieldXpath>div/a/@href|div/h1/age</fieldXpath>
      <otherConfig>福建省地方第三方</otherConfig>
      <bool>false</bool>
    </xstreamTest.WriterConfig>
  </writerConfig>
  <charSet>UTF-8</charSet>
</xstreamTest.FilterConfig>

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值