直接代码不墨迹
maven依赖引入
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>1.6.1</version>
</dependency>
定义对象
@Data
public class RssItem {
private String title;
private String link;
private String describe;
private String thumb;
private String data;
private String dataLength;
private String pubDate;
private String guid;
private String author;
}
@Data
public class RssXml {
private int id;
private String type;
private String title;
private String author;
private String url;
private String image;
private List<RssItem> items;
}
解析方法
public RssXml parseXml(String url) {
SAXReader reader = new SAXReader();
RssXml rssXml = new RssXml();
rssXml.setUrl(url);
// 设置文件编码
reader.setEncoding("UTF-8");
Document document = null;
try {
document = reader.read(new URL(url));
} catch (Exception e) {
try {
// 这里是为了做兼容, 有的rss源有权限问题, 不能用上面的方式访问
// 这个方法是我的工具类, 代码贴到下面,是为了模拟浏览器,绕过验证
InputStream inputStream = RSSUtil.getInputStream(url);
document = reader.read(inputStream);
} catch (Exception ex) {
throw new YamiShopBindException("解析RSS失败");
}
}
Element dataViewRoot = document.getRootElement();
Iterator iteratorRoot = dataViewRoot.elementIterator();
while (iteratorRoot.hasNext()) {
Element dataElement = (Element) iteratorRoot.next();
Element title = dataElement.element("title");
rssXml.setTitle(title.getStringValue());
// 判断 这是我自己的业务, 你可以 不这么写, 这里不重要,重要的是解析方式和方法
if (StringUtils.equals(rssXml.getType(), "ximalaya")) {
Element author = dataElement.element("author");
if (author != null) {
rssXml.setAuthor(author.getStringValue());
}
}
List<Element> item = dataElement.elements("item");
List<RssItem> list = new ArrayList<>();
for (int i = 0; i < item.size(); i++) {
if (i >= 100) {
break;
}
RssItem e = new RssItem();
Element o = item.get(i);
Element t = o.element("title");
if (t != null) {
e.setTitle(t.getStringValue());
}
Element link = o.element("link");
Element image = o.element("image");
Element pubDate = o.element("pubDate");
Element guid = o.element("guid");
Element creator = o.element("creator");
if (creator != null) {
e.setAuthor(creator.getStringValue());
}
if(guid != null) {
e.setGuid(guid.getStringValue());
}
Element enclosure = o.element("enclosure");
if (enclosure != null) {
Attribute attribute = enclosure.attribute("url");
Attribute length = enclosure.attribute("length");
if (attribute != null) {
String stringValue = attribute.getStringValue();
e.setData(stringValue);
if (length != null) {
e.setDataLength(length.getStringValue());
}
}
}
if(link != null) {
e.setLink(link.getStringValue());
}
if (image != null) {
Attribute attribute1 = image.attribute("href");
if (attribute1 != null) {
e.setThumb(attribute1.getStringValue());
}
}
e.setPubDate(pubDate.getStringValue());
Element description = o.element("description");
e.setDescribe(description.getStringValue());
list.add(e);
}
rssXml.setItems(list);
}
return rssXml;
}
package com.yami.shop.common.util;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
/**
* @author JiYanLong
* @date 2023/12/22 17:25
*/
public class RSSUtil {
public static InputStream getInputStream(String url) throws IOException {
DefaultHttpClient client = new DefaultHttpClient(new PoolingClientConnectionManager());
HttpGet get = new HttpGet(url);
// rsp.setCharacterEncoding("UTF-8");
// get.setContentType();
get.setHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
get.setHeader("Sec-Ch-Ua", "\"Not_A Brand\";v=\"8\", \"Chromium\";v=\"120\", \"Google Chrome\";v=\"120\"");
get.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
CloseableHttpResponse response = client.execute(get);
return response.getEntity().getContent();
}
public static String parseXML(String url) throws IOException {
InputStream inputStream = getInputStream(url);
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
StringBuilder stringBuilder = new StringBuilder();
String str = "";
while ((str = reader.readLine()) != null) {
stringBuilder.append(str);
}
inputStream.close();
return stringBuilder.toString();
}
}
代码就是上面的所有的内容,重点在 parseXml 这个方法。
下面是我用上面代码做的小程序,订阅RSS用的,希望对你有帮助。