SAXParser

package com.RSSReader.parser;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class RssParser extends DefaultHandler {
/**
* The constructor for the RSS Parser
*
* @param url
*/
public RssParser(String url) {
this.urlString = url;
this.text = new StringBuilder();
}

/**
* Returns the feed as a RssFeed, which is a ListArray
*
* @return RssFeed rssFeed
*/
public Channel getChannel() {
return (this.channel);
}

public void parse() throws ParserConfigurationException, SAXException,
IOException {
InputStream urlInputStream = null;
SAXParserFactory spf = null;
SAXParser sp = null;
URL url = new URL(this.urlString);
urlInputStream = url.openConnection().getInputStream();
spf = SAXParserFactory.newInstance();
if (spf != null) {
sp = spf.newSAXParser();
sp.parse(urlInputStream, this);
}
if (urlInputStream != null)
urlInputStream.close();
}

public void startElement(String uri, String localName, String qName,
Attributes attributes) {

/** First lets check for the channel */
if (localName.equalsIgnoreCase("channel")) {
this.channel = new Channel();
}

/** Now lets check for an item */
if (localName.equalsIgnoreCase("item") && (this.channel != null)) {
this.item = new Item();
this.channel.addItem(this.item);
}

/** Now lets check for an image */
if (localName.equalsIgnoreCase("image") && (this.channel != null)) {
this.imgStatus = true;
}

}

/**
* This is where we actually parse for the elements contents
*/
public void endElement(String uri, String localName, String qName) {
/** Check we have an RSS Feed */
if (this.channel == null) {
return;
}

/** Check are at the end of an item */
if (localName.equalsIgnoreCase("item")) {
this.item = null;
}

/** Check we are at the end of an image */
if (localName.equalsIgnoreCase("image"))
this.imgStatus = false;

/** Now we need to parse which title we are in */
if (localName.equalsIgnoreCase("title")) {
/** We are an item, so we set the item title */
if (this.item != null) {
this.item.setTitle(this.text.toString().trim());
/** We are in an image */
} else {
this.channel.setName(this.text.toString().trim());
}
}

/** Now we are checking for a link */
if (localName.equalsIgnoreCase("link")) {
/** Check we are in an item * */
if (this.item != null) {
this.item.setUrl(this.text.toString().trim());
/** Check we are in an image */
} else if (this.imgStatus) {
// this.channel.setImage(this.text.toString().trim());
/** Check we are in a channel */
} else {
this.channel.setUrl(this.text.toString().trim());
}
}

/** Now we are checking for a logo of channel */
if (localName.equalsIgnoreCase("url")) {
if (this.imgStatus && this.channel != null) {
this.channel.setLogoUrl(this.text.toString().trim());
}
}

/** Checking for a description */
if (localName.equalsIgnoreCase("description")) {
/** Lets check we are in an item */
if (this.item != null) {
String des = this.text.toString().trim();
this.item.setDescription(des);

/** Lets check we are in the channel */
} else {
// this.channel.setDescription(this.text.toString().trim());
}
}

/** Checking for a publish date */
if (localName.equalsIgnoreCase("pubDate")) {
/** Lets check we are in an item */
if (this.item != null) {
Date publishDate = getDateTime(this.text.toString().trim());
this.item.setPublishDate(publishDate);
/** Lets check we are in the channel */
} else {
}
}


this.text.setLength(0);
}

public void characters(char[] ch, int start, int length) {
this.text.append(ch, start, length);
}

public static final String RSS_FORMAT_DATE = "EEE, d MMM yyyy hh:mm:ss Z";

public static Date getDateTime(String value) {
SimpleDateFormat formatter = new SimpleDateFormat(RSS_FORMAT_DATE);
Date date;
try {
date = formatter.parse(value);
} catch (ParseException e) {
date = Calendar.getInstance().getTime();
}
return date;
}

private String urlString;
private Channel channel;
private StringBuilder text;
private Item item;
private boolean imgStatus;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
SAXParser本身不会保留空行,它只是按照XML文档的结构解析XML内容。如果XML文档中有空行,SAXParser会忽略它们,并将它们视为普通的空格字符。 如果你想在解析XML文档时保留空行,可以在处理startElement和endElement事件时添加一些逻辑来判断当前元素是否是文本元素,并将文本元素的内容输出到控制台或缓冲区。 以下是一个简单的示例代码,可以在解析XML文档时保留空行。 ```java import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class MyHandler extends DefaultHandler { private StringBuilder buffer; @Override public void startDocument() throws SAXException { buffer = new StringBuilder(); } @Override public void endDocument() throws SAXException { System.out.println(buffer.toString()); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // 判断当前元素是否是文本元素 if (qName.equalsIgnoreCase("text")) { buffer.append("\n"); // 添加空行 } } @Override public void characters(char[] ch, int start, int length) throws SAXException { buffer.append(new String(ch, start, length)); } } ``` 在上面的代码中,我们在startElement方法中判断当前元素是否是文本元素,并在buffer中添加一个空行。在characters方法中,我们将文本元素的内容添加到buffer中。最后,在endDocument方法中,我们输出buffer中保存的XML内容,包括空行和其他文本内容。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值