将雅虎《心香一脉》每天推荐的文章发到咱邮箱(Java版)

    本文介绍的就是把雅虎心香一脉(http://soul.cn.yahoo.com/)每天的那4篇文章全文发到指定邮箱,没有广告,没有其他内容,就文章本身。用到的东西:jmail、jsoup、httpclient。我的本意其实就是想要个那4篇文章的feed,可是搜了一下雅虎没有提供,只好自己想办法解决了。这个感觉是比较笨的方法,还有个简单的方法来制作feed,这个在下篇博文中会介绍。

    上面3个东西分别是用来干什么的呢?jmail就是用来发送邮件的了,jsoup是用来解析html,httpclient是用来发送请求的。想更进一步的了解请自行google。

    项目结构:
          XinXiangYiMai
                    --lib
                    --src
                        --com.xx.yahoo
                        --com.xx.util

    项目代码
package com.xx.yahoo;

/**
 * Created with IntelliJ IDEA.
 * User: xx
 * Date: 13-3-27
 * Time: 上午11:39
 * DoWhat:文章
 */
public class Article {

    private String title;

    private String author;

    private String createTime;

    private String content;

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public String getAuthor() {
        return author;
    }

    public void setAuthor(String author) {
        this.author = author;
    }

    public String getCreateTime() {
        return createTime;
    }

    public void setCreateTime(String createTime) {
        this.createTime = createTime;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }
}


package com.xx.yahoo;

import com.xx.util.MyTimeUtil;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: xx
 * Date: 13-3-26
 * Time: 下午5:55
 * DoWhat:获取url的返回的网页源码
 */

public class ResourceHtml {

    /**
     * 根据url获得页面源码
     * @param url
     * @return
     */
    public static String getHtmlByUrl(String url) {
        String html = null;
        HttpClient httpClient = new DefaultHttpClient();
        HttpGet httpGet = new HttpGet(url);

        try {
            HttpResponse httpResponse = httpClient.execute(httpGet);
            int state = httpResponse.getStatusLine().getStatusCode();
            if (state == HttpStatus.SC_OK) {
                HttpEntity entity = httpResponse.getEntity();
                html = EntityUtils.toString(entity);
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            httpClient.getConnectionManager().shutdown();
        }

        return html;
    }

    /**
     * 获取页面源码中的链接
     *
     * @param html
     * @return
     */
    public static List<String> getArticleLinks(String html, String css) {
        List<String> list = getArticleLinks(html, css, null);
        return list;

    }


    /**
     * 获取页面源码中的链接
     * @param html
     * @param css1
     * @param css2
     * @return
     */
    public static List<String> getArticleLinks(String html, String css1, String css2) {
        List<String> list = new ArrayList<String>();
        Document doc = getDocument(html);

        Elements elements1 = doc.select(css1);
        for (Element e : elements1) {
            String href = e.attr("href");
            list.add(href);
        }

        if (css2 != null && !css2.isEmpty()) {
            Elements elements2 = doc.select(css2);
            for (Element e : elements2) {
                String href = e.attr("href");
                list.add(href);
            }
        }

        return list;

    }

    /**
     * 获取一篇文章,纯文本
     *
     * @param url
     */
    public static Article doArticle(String url) {
        String html = getHtmlByUrl(url);
        Document doc = getDocument(html);
        Element title = doc.select("div.title > h2").first();
        Element author = doc.select("div.title > p").first();
        String createTime = MyTimeUtil.getNowStr("yyyyMMdd");
        Element div = doc.select("div.article_p").first();
        String temp = div.html();
        String content = temp.replaceAll("<[^>]*>","");

        Article article = new Article();
        article.setTitle(title.text());
        article.setAuthor(author.text());
        article.setCreateTime(createTime);
        article.setContent(content);
        return article;
    }

    /**
     * 获取一篇文章,带html标签的
     * @param url
     * @return
     */
    public static Article doHtmlArticle(String url) {
        String html = getHtmlByUrl(url);
        Document doc = getDocument(html);
        Element title = doc.select("div.title > h2").first();
        Element author = doc.select("div.title > p").first();
        String createTime = MyTimeUtil.getNowStr("yyyyMMdd");
        Element div = doc.select("div.article_p").first();
        String temp = div.html();

        Article article = new Article();
        article.setTitle(title.text());
        article.setAuthor(author.text());
        article.setCreateTime(createTime);
        article.setContent(temp);
        return article;
    }

    public static void doTXT(Article article,String path) {
        File txt = new File(path);
        if(!txt.exists()) {
            txt.mkdir();
        }
        System.out.println(txt.getAbsolutePath());
        File timeFile = new File(txt.getAbsolutePath()+"/"+article.getCreateTime());
        if(!timeFile.exists()) {
            timeFile.mkdir();
        }
        File articleFile = new File(timeFile.getAbsolutePath()+"/"+article.getTitle());

        try {
            articleFile.createNewFile();
            BufferedWriter bw = new BufferedWriter(new FileWriter(articleFile));
            bw.write(article.getAuthor());
            bw.newLine();
            bw.write(article.getCreateTime());
            bw.newLine();
            bw.write(article.getContent());
            bw.flush();
        } catch (IOException e) {
            System.out.println("创建文件失败");
            e.printStackTrace();
        }

    }

    /**
     * 解析html,获得Document
     * @param html
     * @return
     */
    private static Document getDocument(String html) {
        Document document = Jsoup.parse(html);
        return document;
    }


}

主程序
package com.xx.yahoo;

import com.xx.util.MyJMailUtil;

import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * User: xx
 * Date: 13-3-26
 * Time: 下午6:16
 * DoWhat:
 */
public class MyPaiRunner {

    public static void main(String[] args) {

        String url = "http://soul.cn.yahoo.com/";
        String css1 = "div.i_right > ul > li > a";
        String css2 = "div.i_right > ul > a";
        String html = ResourceHtml.getHtmlByUrl(url);
        List<String> links = ResourceHtml.getArticleLinks(html, css1, css2);
        for (String link : links) {
            System.out.println(link);
            Article article = ResourceHtml.doHtmlArticle(link);
            System.out.println(article.getTitle());
            MyJMailUtil.sendMail(article);
        }


    }


}

util工具包

package com.xx.util;

import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * Created with IntelliJ IDEA.
 * User: xx
 * Date: 13-3-27
 * Time: 上午11:32
 * DoWhat:根据一定格式获得一定格式的时间字符串
 */
public class MyTimeUtil {

    public static String getNowStr(String pattern) {
        return getNowStr(pattern, null);
    }

    public static String getNowStr(String pattern, Date date) {
        SimpleDateFormat df = new SimpleDateFormat(pattern);
        if (date == null) {
            date = new Date();
        }
        String now = df.format(date);
        return now;
    }

}

package com.xx.util;

import com.xx.yahoo.Article;

import javax.mail.*;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import java.util.Properties;

/**
 * Created with IntelliJ IDEA.
 * User: xx
 * Date: 13-3-28
 * Time: 下午2:58
 * DoWhat:将获取的文章发送到制定邮箱
 */

public class MyJMailUtil {

    private static String host = "smtp.163.com";//163邮箱服务器
    private static String to = "xx";//接受邮件的邮箱
    private static String from = "xx";//发送邮件的邮箱
    private static String username = "xx";//发送邮件的邮箱的用户名
    private static String password = "xx";//发送邮件的邮箱的密码



    /**
     * 发送邮件
     * @param article
     */
    public static void sendMail(Article article){

        //获取Properties对象
        Properties properties = System.getProperties();
        //添加smtp服务器属性
        properties.put("mail.smtp.host",host);
        properties.put("mail.smtp.auth", true);

        //创建邮件会话
        Session session = Session.getDefaultInstance(properties,new Authenticator() {
            @Override
            protected PasswordAuthentication getPasswordAuthentication() {//验证账号
                return new PasswordAuthentication(username, password);
            }
        });

        //是否在控制台显示debug信息
//        session.setDebug(true);

        try {
            //定义邮件信息
            MimeMessage message = new MimeMessage(session);
            message.setFrom(new InternetAddress(from));
            message.addRecipient(Message.RecipientType.TO,new InternetAddress(to));
            message.setSubject(article.getTitle());
            message.setContent(article.getAuthor()+"\n"+article.getCreateTime()+"\n"+article.getContent(),"text/html;charset=utf-8");

//            message.setText(article.getAuthor()+"\n"+article.getCreateTime()+"\n"+article.getContent());

            //发送
            Transport.send(message);
        } catch (MessagingException e) {
            e.printStackTrace();
        }

    }

}


    内容都在代码中了,有不清楚的,可以留言,我也是个练习级别的,呵呵

转载于:https://my.oschina.net/qqli/blog/123489

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值