package com.ivanlovetracy.rssreader;
import java.io.InputStream;
import java.net.URL;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import android.text.Html;
public class RssParser {
public static RssFeed rssParse(String xmlSource) throws Exception{
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
RssHandler handler = new RssHandler();
// XMLReader reader = parser.getXMLReader();
// reader.setContentHandler(handler);
// InputSource is = new InputSource((new URL(xmlSource)).openStream());
// reader.parse(is);
InputStream is = (new URL(xmlSource)).openStream();
parser.parse(is, handler);
is.close();
return handler.getFeed();
}
private static class RssHandler extends DefaultHandler{
final int RSS_TITLE = 1;
final int RSS_LINK = 2;
final int RSS_DESCRIPTION = 3;
final int RSS_CATEGORY = 4;
final int RSS_PUBDATE = 5;
/*为解决不能识别CDATA而引入StringBuffer
* 实际上charactor方法是可以解析出 CDATA中包含的数据的,准确的说是charactor
* 可以解析<>外的数据,包括空格,当SAX解析到<!CDATA[[时,也会掉用charactor
* 方法解析当中包含的数据,所以可以用一个StringBuffer把<title>与</title>之
* 间的所有数据连接起来,然后再endElement方法中把StringBuffer赋给,RssItem
* 中的title, 也就得到了CDATA中的数据,description中同理 *
* */
private StringBuffer sBuffer = new StringBuffer();
private int state;
/*尝试使用LexicalHandler接口中的startCDATA方法,结果没有得到调用,原因不明
* private boolean isCdata = false;
* */
private RssFeed rssFeed;
private RssItem rssItem;
public RssFeed getFeed(){
return rssFeed;
}
@Override
public void startDocument() throws SAXException {
rssFeed = new RssFeed();
/*必须在此新建RssItem对象,否则在characters()方法中,
*当遇到不是item的情况下(比如channel中),仍然会有localName=title
*的情况出现,此时会执行rssItem.setTitle()方法,将会因为找不到
*rssItem对象而抛出空指针异常
*/
rssItem = new RssItem();
}
@Override
public void endDocument() throws SAXException {
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (localName.equals("channel")) {
state = 0;
return;
}
if (localName.equals("item")) {
rssItem = new RssItem();
return;
}
if (localName.equals("title")) {
state = RSS_TITLE;
sBuffer = new StringBuffer();
return;
}
if (localName.equals("link")) {
state = RSS_LINK;
return;
}
if (localName.equals("description")) {
state = RSS_DESCRIPTION;
sBuffer = new StringBuffer();
return;
}
if (localName.equals("category")) {
state = RSS_CATEGORY;
return;
}
if (localName.equals("pubDate")) {
state = RSS_PUBDATE;
return;
}
state = 0;
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (localName.equals("title")) {
rssItem.setTitle(sBuffer.toString());
state = 0;
}
if (localName.equals("description")) {
rssItem.setDescription(sBuffer.toString());
state = 0;
}
if (localName.equals("item")) {
rssFeed.addItem(rssItem);
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
String string = Html.fromHtml(new String(ch,start,length)).toString();
// String string = new String(ch,start,length);
switch (state) {
case RSS_TITLE:
sBuffer.append(string);
// rssItem.setTitle(sBuffer.toString());
// state = 0;
break;
case RSS_LINK:
rssItem.setLink(string);
state = 0;
break;
case RSS_DESCRIPTION:
sBuffer.append(string);
// rssItem.setDescription(sBuffer.toString());
// state = 0;
break;
case RSS_CATEGORY:
rssItem.setCategory(string);
state = 0;
break;
case RSS_PUBDATE:
rssItem.setPubdate(string);
state = 0;
break;
default:
break;
}
return;
}
}
}
package com.ivanlovetracy.rssreader;
import java.io.InputStream;
import java.net.URL;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserFactory;
import android.text.Html;
public class RssPullParser {
private int state = 0;
private boolean isItemTag = false;
private RssFeed rssFeed;
private RssItem rssItem;
private static final int TITLE = 1;
private static final int LINK = 2;
private static final int DESCRIPTION = 3;
private static final int CATEGORY = 4;
private static final int PUBDATE = 5;
public RssFeed rssParse(String url) throws Exception{
XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
factory.setNamespaceAware(true);
XmlPullParser parser = factory.newPullParser();
InputStream is = (new URL(url)).openStream();
parser.setInput(is, "UTF-8");
int eventType = parser.getEventType();
while (eventType != XmlPullParser.END_DOCUMENT) {
String tagName = parser.getName();
switch (eventType) {
case XmlPullParser.START_DOCUMENT:
rssFeed = new RssFeed();
break;
case XmlPullParser.START_TAG:
if (tagName.equals("item")) {
rssItem = new RssItem();
isItemTag = true;
}
if (tagName.equals("title")) {
state = TITLE;
}
if (tagName.equals("link")) {
state = LINK;
}
if (tagName.equals("description")) {
state = DESCRIPTION;
}
if (tagName.equals("category")) {
state = CATEGORY;
}
if (tagName.equals("pubDate")) {
state = PUBDATE;
}
break;
case XmlPullParser.END_TAG:
if (tagName.equals("item")) {
rssFeed.addItem(rssItem);
}
break;
case XmlPullParser.TEXT:
if (isItemTag) {
String tagText = Html.fromHtml(parser.getText()).toString().trim();
switch (state) {
case TITLE:
rssItem.setTitle(tagText);
state = 0;
break;
case LINK:
rssItem.setLink(tagText);
state = 0;
break;
case DESCRIPTION:
rssItem.setDescription(tagText);
state = 0;
break;
case CATEGORY:
rssItem.setCategory(tagText);
state = 0;
break;
case PUBDATE:
rssItem.setPubdate(tagText);
state = 0;
break;
default:
break;
}
}
break;
default:
break;
}
eventType = parser.next();
}
is.close();
return rssFeed;
}
}