通过sax2写Action() 调用来读http://...xml 信息（sax2）

本文链接：https://blog.csdn.net/yuan8080/article/details/7032068

和上一个很类似：只是通过不通的方法：

第一通过前台action调用写了三个类：FeaturedBlogAction.java | RssReader.java继承 DefaultHandler 实现四个接口

implements EntityResolver, DTDHandler, ContentHandler, ErrorHandler |BlogCrainInfo.java 继承RssItemDTO

package com.hiredmyway.action.crain;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.log4j.Logger;

import com.hiredmyway.action.DefaultAction;
import com.hiredmyway.dto.BlogCrainInfo;
import com.hiredmyway.dto.common.SelectDTO;
import com.hiredmyway.service.OrgService;
import com.hiredmyway.service.ServiceFactory;
import com.hiredmyway.util.RssReader;

public class FeaturedBlogAction extends DefaultAction {

   /**
   *
   */
   private static final long serialVersionUID = 1L;
   private Logger logger = Logger.getLogger(FeaturedBlogAction.class);
   private String rssURL;
   private OrgService orgService;
   private String number;// 从前台传得参数就是有几个要显示在页面上比如有8条记录

   public String getRssURL() {
       return rssURL;
   }
   public void setRssURL(String rssURL) {
       this.rssURL = rssURL;
   }
   public String getNumber() {
       return number;
   }
   public void setNumber(String number) {
       this.number = number;
   }

   @Override
   public String execute() throws Exception {
       logger.info(super.getRemoteInfo()+"get blog items from site:"+rssURL);
       RssReader MyRSSParser = new RssReader(getRssURL(),Integer.valueOf(getNumber()),new BlogCrainInfo());
       String results="no results found";
       try {
           results = MyRSSParser.parse();
       } catch (Exception e) {
           logger.error(super.getRemoteInfo()+"get blog items error"+e.getMessage());
           results = e.getMessage();
       }
       response.setContentType("text/html");
       response.setCharacterEncoding("UTF-8");
       response.getWriter().write(results);
       return null;
   }


}

==============================================

package com.hiredmyway.util;

import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import com.hiredmyway.dto.BlogCrainInfo;//这个类是我本人做的
import com.hiredmyway.dto.BlogInfo;
import com.hiredmyway.dto.EmploymentItem;
import com.hiredmyway.dto.RssItemDTO;

public class RssReader extends DefaultHandler {
   // How many RSS news items should we load before stopping.
   private int maximumResults =6;
   //get element form "item".
   private boolean isbegin=false;
   /*
   * How many elements should we allow before stopping the parse this stops
   * giant files from breaking the server.
   */
   private static final int MAX_ELEMENTS = 500;
   // Keep track of the current element count.
   private int ecount = 0;
   // Keep track of the current news item count.
   private int rcount =0;
   private String Url = "http://blog.hiredmyway.com/feed";// Url to parse.
   // String to store parsed data to.
   private String output = "";
   // Current string being parsed.
   private String currentText = "";
   // Current RSS News Item.
   private RssItemDTO NI = null;
   private RssItemDTO rssItem=null;
   // ArrayList of all current News Items.
   private ArrayList News = new ArrayList();
   // Has the RSS feed's description been set yet?
   boolean dSet = false;
   //BLOGSITEM or EMPLOYMENTITEM
   private String type="BLOGSITEM";
   private String imageUrl;
   // Constructor.
   /**
   * @param Url            the feed source
   * @param maximumResults the number of results
   * @param               RssItemDTO Object
   */
   public RssReader(String Url, int maximumResults,RssItemDTO rssItem) {
       super();
       this.Url = Url;
       this.maximumResults = maximumResults;
       this.rssItem=rssItem;
   }

   /**
   * Returns an HTML representation of the news feed being parsed.
   */
   public synchronized String parse() throws Exception{
       Exception ex = null;
       try {
           XMLReader xr = XMLReaderFactory.createXMLReader();
           xr.setContentHandler(this);
           xr.setErrorHandler(this);
           URL u = new URL(Url);
           URLConnection UC = u.openConnection();
           /*
           * If we don't set the user-agent property sites like Google won't
           * let you access their feeds.
           */
           UC.setRequestProperty("User-agent", "www.hiredmyway.com");
           InputStreamReader r = new InputStreamReader(UC.getInputStream());
           xr.parse(new InputSource(r));
       } catch (Exception e) {
           ex = e;
       }
       // Output all the parsed news items as HTML.
       for (int i = 0; News.size() >0 && i < News.size(); i++) {
           output += ((RssItemDTO) News.get(i)).toString(i,News.size());
       }
       if(StringUtils.isEmpty(output)){
           if(ex != null){
               throw ex;
           }
       }
       return (output);
   }

   // //
   // Event handlers.
   // //
   // Called when the XML file begins.
   public void startDocument() {

   }

   // Called when the end of the XML file is reached.
   public void endDocument() {
       /*
       * If we have a partially parsed news item throw it into our array.
       */
       if (NI != null) {
           rcount++;
           News.add(NI);
       }
   }

   // Called when we begin parsing the XML file.
   public void startElement(String uri, String name, String qName,
           Attributes atts) throws SAXException {
       // qName contains the non-URI name of the XML element.
       if (qName.equals("item")) {
           isbegin=true;
           if (NI != null) {
               // We've fetched another news item.
               if (rcount >= maximumResults) {
                   // Maximum results have been reached.

                   throw new SAXException("\nLimit reached.");
               }else{
                   rcount++;
                   // Add it to our ArrayList.
                   News.add(NI);
               }
           }
           // Create a new NewsItem to add data to.
           NI=rssItem;
           if(NI instanceof BlogInfo)
               NI = new BlogInfo();
           else if(NI instanceof EmploymentItem)
               NI = new EmploymentItem();
           else if (NI instanceof BlogCrainInfo)
               NI = new BlogCrainInfo();
       }
   }

   // We've reached the end of an XML element.
   public void endElement(String uri, String name, String qName)
           throws SAXException {
       if(!isbegin){
           currentText = "";
           return;
       }
       if(NI!=null){
           if (qName.equals("link"))
               NI.setURL(currentText);
           else if (qName.equals("description"))
               NI.setDescription(currentText);
           if(NI instanceof EmploymentItem){
               if (qName.equals("title")) {
                   NI.setTitle(currentText);
               }else if(qName.equals("pubDate")){
                   NI.setPubDate(SysUtil.convertDateStringToString(currentText));
               }

           }else if(NI instanceof BlogInfo){
               if(qName.equals("title")){
                   currentText=SysUtil.subString(currentText, 0, 50);
                   NI.setTitle(currentText);
               }
           } else if (NI instanceof BlogCrainInfo) {
               if (qName.equals("title")){
                   currentText = SysUtil.subString(currentText, 0, 50);
                   NI.setTitle(currentText);
               }
               if (qName.equals("description")){
                   NI.setDescription(currentText);
               }
               if (qName.equals("content:encoded")) {
                   List list = getImageUrl(currentText);
                   if (list != null && list.size() > 0) {
                       for (int i = 0; i < list.size();) {
                           this.imageUrl = getImageUrl(currentText).get(i);
                           NI.setImageUrl(imageUrl);
                           break;
                       }
                   }
               }
           }
       }
       // Make sure we don't attempt to parse too long of a document.
       currentText = "";
       ecount++;
       if (ecount > MAX_ELEMENTS)
           throw new SAXException("\nLimit reached");
   }

   // Parse characters from the current element we're parsing.
   public void characters(char ch[], int start, int length) {
       for (int i = start; i < start + length; i++) {
           currentText += ch[i];
       }
   }
    // 用正则表达式来获得<img src="xxxx">
   private List<String> getImageUrl(String content) {
       String img = "";
       Pattern p_image;
       Matcher m_image;
       List<String> pics = new ArrayList<String>();
       String regex_img = "<img.*src\\s*=\\s*(.*?)[^>]*?>";
       p_image = Pattern.compile(regex_img, Pattern.CASE_INSENSITIVE);
       m_image = p_image.matcher(content);
       while (m_image.find()) {
           img = img + "," + m_image.group();
           Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);
           while (m.find()) {
               pics.add(m.group(1));
           }
       }
       return pics;

   }
}
=====================================================

package com.hiredmyway.dto;

import org.apache.commons.lang.StringUtils;

import com.hiredmyway.util.string.ShortTextSubString;
import com.hiredmyway.util.string.TitleSubString;

public class RssItemDTO {
       private String Title = "";
       private String URL = "";
       private String description="";
       private String pubDate="";
       private String imageUrl = "";

       public String getDescription() {
           if(StringUtils.isNotBlank(description)){
               description = new ShortTextSubString().trimSubString(description);
           }
           return description;
       }
       public void setDescription(String description) {
           this.description = description;
       }
       public void setTitle(String Title) {
           if(StringUtils.isNotBlank(this.Title)){
               this.Title = new TitleSubString().trimSubString(this.Title);
           }
           this.Title = Title;
       }
       public void setURL(String URL) {
           this.URL = URL;
       }

       public String toString(int currentItem,int itemAmount) {
           return "";
       }

       public String getTitle() {
           return Title;
       }
       public String getURL() {
           return URL;
       }
       public String getPubDate() {
           return pubDate;
       }
       public void setPubDate(String pubDate) {
           this.pubDate = pubDate;
       }

       public String getImageUrl() {
           return imageUrl;
       }
       public void setImageUrl(String imageUrl) {
           this.imageUrl = imageUrl;
       }

}

==================================
public class BlogCrainInfo extends RssItemDTO{

//通过toString()方法输出在页面上
   @Override
   public String toString(int currentItem,int itemAmount) {
       String returnMe = "";
       returnMe +="<div class=\"title_infor_5\"><a href=\"" + getURL() + "\" target=\"_parent\">" + getTitle() +"</a></div>"
                   +"<div class=\"daily_blog_title\" > <img src=\""+getImageUrl()+"\" width=\"72px\" height=\"67px\" /> <span>"+getDescription()+"</span> </div>"+"|";
       return (returnMe);
   }
}