RSS抽取相关工具类

package rss;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
import java.util.List;

import com.sun.syndication.feed.synd.SyndCategory;
import com.sun.syndication.feed.synd.SyndContent;
import com.sun.syndication.feed.synd.SyndEnclosure;
import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.SyndFeedInput;
import com.sun.syndication.io.XmlReader;

/**
 * 解析RSS工具类
 * @author linwei
 *
 */
public class RssUtil {

	/**
	 * 根据url接口解析rss
	 * @param url
	 * @return SyndFeed
	 */
	public static SyndFeed getRssBYURL(String url){
		SyndFeedInput input = new SyndFeedInput();
		try {
			URL rss = new URL(url);
			XmlReader read=new XmlReader(rss);
			SyndFeed feed = input.build(read);
			return feed;
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (IllegalArgumentException e) {
			e.printStackTrace();
		} catch (FeedException e) {
			e.printStackTrace();
		}
		return null;
	}
	
	/**
	 * 根据文件路径解析rss
	 * @param path
	 * @return SyndFeed
	 */
	public static SyndFeed getRssBYPath(String path){
		SyndFeedInput input = new SyndFeedInput();
		try {
			File file = new File(path);
			InputStream in = new FileInputStream(file);
			XmlReader read=new XmlReader(in);
			SyndFeed feed = input.build(read);
			return feed;
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (IllegalArgumentException e) {
			e.printStackTrace();
		} catch (FeedException e) {
			e.printStackTrace();
		}
		return null;
	}
	public static void main(String[] args) {
		String path = "D:/rss.xml";
		SyndFeed feed=getRssBYPath(path);
		System.out.println("标题:"+feed.getTitle());
		System.out.println("发布时间:"+feed.getPublishedDate());//Fri, 22 Feb 2008 15:49:18 GMT
		System.out.println("语言:"+feed.getLanguage());
		System.out.println("feed.getUri():"+feed.getUri());
		System.out.println("feed.getEncoding():"+feed.getEncoding());
		System.out.println("feed.getLink():"+feed.getLink());
		List<SyndEntry> list = feed.getEntries();
		if (list != null && !list.isEmpty()) {
		    for (SyndEntry entry : list) {
		    	// 标题、连接地址、标题简介、时间是一个Rss源项最基本的组成部分
		        System.out.println("标题:" + entry.getTitle());
		        System.out.println("连接地址:" + entry.getLink());
		        SyndContent description = entry.getDescription();
		        Date date =entry.getPublishedDate();
		        Date update =entry.getUpdatedDate();
		        System.out.println("发布时间:" + String.valueOf(date));
		        System.out.println("更新时间:" + String.valueOf(update));
		        
		        SyndContent titleEx = entry.getTitleEx();
		        String title=titleEx==null?"":titleEx.getValue();
		        System.out.println("标题EX:" +title );
		        // 以下是Rss源可先的几个部分
		        System.out.println("标题的作者:" + entry.getAuthor());
		        System.out.println("链接:" + entry.getLink());
		        
		        String value=description==null?"":description.getValue();
		        System.out.println("标题简介:" + value);
		    
		        // 得到内容
		        List contentsList=entry.getContents();
		        if (contentsList != null) {
		         for (int m = 0; m < contentsList.size(); m++) {
		          String contents = (String) contentsList.get(m);
		          System.out.println("得到内容:" + contents);
		         }
		        }
		        
		        // 得到Links
		        List linkList=entry.getLinks();
		        if (linkList != null) {
		         for (int m = 0; m < linkList.size(); m++) {
		          String link = (String) linkList.get(m);
		          System.out.println("连接地址:" + m + link);
		         }
		        }
		        
		        // 此标题所属的范畴
		        List categoryList = entry.getCategories();
		        if (categoryList != null) {
		         for (int m = 0; m < categoryList.size(); m++) {
		          SyndCategory category = (SyndCategory) categoryList
		            .get(m);
		          System.out.println("此标题所属的范畴:" + category.getName());
		         }
		        }
		        
		        // 得到流媒体播放文件的信息列表
		        List enclosureList = entry.getEnclosures();
		        if (enclosureList != null) {
		         for (int n = 0; n < enclosureList.size(); n++) {
		          SyndEnclosure enclosure = (SyndEnclosure) enclosureList.get(n);
		          System.out.println("流媒体播放文件:" + entry.getEnclosures());
		         }
		        }
		    }  
		}
	}
}

 

需要导入相对应的JAR包。 rome-0.9.jar 以及jdom.jar俩个相关包。(同时,请记住编码格式需要为UFT-8格式的RSS返回格式才能正常解析。

     eg:<?xml version="1.0" encoding="utf-8"?>)



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值