Jsoup 获取微信公众号文章

jsoup下载链接:jsoup下载
jsoup中文文档:jsoup中文文档

import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Clear {
	//获取文章封面图片
	public static String getCoverUrl(String informationUrl) throws IOException {
    	String picUrl = null;
		int flag;
		Document doc = Jsoup.connect(informationUrl).timeout(3000).get();
		String htmlString=doc.toString();
		flag=htmlString.indexOf("msg_cdn_url");
		while(htmlString.charAt(flag)!='\"'){
			flag++;
		}
		int beginIndex=++flag;
		while(htmlString.charAt(flag)!='\"')
			flag++;
		int endIndex=--flag;
		picUrl=htmlString.substring(beginIndex,endIndex);
		return picUrl;
    }
	//获取公众号名称
	public static String getName(String informationUrl) throws IOException {
		Document doc = Jsoup.connect(informationUrl).timeout(3000).get();
		Element names = doc.getElementById("js_name");
		String name = names.text();
        return name;
	}
	//获取文章时间
	public static String getTime(String informationUrl) throws IOException {
		String time=null;
		Document doc = Jsoup.connect(informationUrl).timeout(3000).get();
		Elements scripts = doc.select("script");
        for (Element script : scripts) {
            String html = script.html();
            if (html.contains("document.getElementById(\"publish_time\")")) {
                int fromIndex = html.indexOf("s=\"");
                time=html.substring(fromIndex+3,fromIndex+13);
                return time;
            }
        }
		return time;
    }
	//获取文章标题
	public static String getTitle(String informationUrl) throws IOException {
		Document doc = Jsoup.connect(informationUrl).timeout(3000).get();
		Elements titles = doc.getElementsByClass("rich_media_title");
		String title = titles.text();
        return title;
	}
	//获取公众号
	public static String getOfficialAccount(String informationUrl) throws IOException {
		Document doc = Jsoup.connect(informationUrl).timeout(3000).get();
		Elements metaValues = doc.getElementsByClass("profile_meta_value");
        String account = metaValues.get(0).text();
		return account;
	}
	//获取公众号文章内容
	public static String getContent(String informationUrl) throws IOException {
		Document doc = Jsoup.connect(informationUrl).timeout(3000).get();
		Element metaValues = doc.getElementById("js_content");
		//String content = metaValues.html();//此行获取HTML
		String content = metaValues.text();
		return content;
	}
	//获取公众号真实链接
	public static String getTureUrl(String informationUrl) throws IOException {
		Document doc = Jsoup.connect(informationUrl).timeout(3000).get();
		String tureUrl = doc.select("meta[property=og:url]").get(0).attr("content");
		return tureUrl;
	}
	//获取公众号作者
	public static String getAuthor(String informationUrl) throws IOException {
		Document doc = Jsoup.connect(informationUrl).timeout(3000).get();
		String tureUrl = doc.select("meta[property=og:article:author]").get(0).attr("content");
		return tureUrl;
	}
	
	public static void main(String[] args) throws IOException {
		String url="https://mp.weixin.qq.com/s/gcG6PnYwJUDabdYYe_snYw";
		System.out.println(getTime(url));
		System.out.println(getTitle(url));
		System.out.println(getOfficialAccount(url));
		System.out.println(getContent(url));
		System.out.println(getAuthor(url));
	}
}
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值