public class RssParser {
private int currentstate = -1;
private boolean isItemTAG = false;
private RssItem rssItem;
private RssFeed rssFeed;
final int TITLE = 1;
final int LINK = 2;
final int DESCRIPTION = 3;
final int PUBDATE = 4;
public void ParserRss(String url) throws Exception {
XmlPullParserFactory xmlPullParserFactory = XmlPullParserFactory.newInstance();
xmlPullParserFactory.setNamespaceAware(true);
XmlPullParser xmlPullParser = xmlPullParserFactory.newPullParser();
URL urlAddress = new URL(url);
xmlPullParser.setInput(urlAddress.openStream(), "gbk");
int eventType = xmlPullParser.getEventType();
while (eventType != xmlPullParser.END_DOCUMENT) {
if (eventType == xmlPullParser.START_DOCUMENT) {
System.out.println("start Document...");
rssFeed = new RssFeed();
} else if (eventType == xmlPullParser.END_DOCUMENT) {
System.out.println("end Document...");
} else if (eventType == xmlPullParser.START_TAG) {
if (xmlPullParser.getName().equals("item")) {
rssItem = new RssItem();
isItemTAG = true;
}
if (xmlPullParser.getName().equals("title")) {
currentstate = TITLE;
}
if (xmlPullParser.getName().equals("link")) {
currentstate = LINK;
}
if (xmlPullParser.getName().equals("description")) {
currentstate = DESCRIPTION;
}
if (xmlPullParser.getName().equals("pubDate")) {
currentstate = PUBDATE;
}
} else if (eventType == xmlPullParser.END_TAG) {
if (xmlPullParser.getName().equals("item")) {
rssFeed.addItem(rssItem);
}
} else if (eventType == xmlPullParser.TEXT) {
if (isItemTAG) {
switch (currentstate) {
case TITLE:
rssItem.setTitle(clearSpecialChar(xmlPullParser.getText()));
currentstate = -1;
break;
case LINK:
rssItem.setLink(clearSpecialChar(xmlPullParser.getText()));
currentstate = -1;
break;
case DESCRIPTION:
rssItem.setDescription(clearSpecialChar(xmlPullParser.getText()));
currentstate = -1;
break;
case PUBDATE:
rssItem.setPubData(clearSpecialChar(xmlPullParser.getText()));
currentstate = -1;
break;
default:
break;
}
}
}
eventType = xmlPullParser.next();
}
}
public RssFeed getFeed(){
return rssFeed;
}
private String clearSpecialChar(String s){
Pattern pattern = Pattern.compile("\\s|\\r|\\n|\\t");
Matcher matcher = pattern.matcher(s);
return matcher.replaceAll("").trim();
}
}
一个DOM解析类
最新推荐文章于 2021-05-26 02:19:33 发布