本文介绍的就是把雅虎心香一脉(http://soul.cn.yahoo.com/)每天的那4篇文章全文发到指定邮箱,没有广告,没有其他内容,就文章本身。用到的东西:jmail、jsoup、httpclient。我的本意其实就是想要个那4篇文章的feed,可是搜了一下雅虎没有提供,只好自己想办法解决了。这个感觉是比较笨的方法,还有个简单的方法来制作feed,这个在下篇博文中会介绍。
上面3个东西分别是用来干什么的呢?jmail就是用来发送邮件的了,jsoup是用来解析html,httpclient是用来发送请求的。想更进一步的了解请自行google。
项目结构:
XinXiangYiMai
--lib
--src
--com.xx.yahoo
--com.xx.util
项目代码
package com.xx.yahoo;
/**
* Created with IntelliJ IDEA.
* User: xx
* Date: 13-3-27
* Time: 上午11:39
* DoWhat:文章
*/
public class Article {
private String title;
private String author;
private String createTime;
private String content;
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getCreateTime() {
return createTime;
}
public void setCreateTime(String createTime) {
this.createTime = createTime;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
}
package com.xx.yahoo;
import com.xx.util.MyTimeUtil;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* Created with IntelliJ IDEA.
* User: xx
* Date: 13-3-26
* Time: 下午5:55
* DoWhat:获取url的返回的网页源码
*/
public class ResourceHtml {
/**
* 根据url获得页面源码
* @param url
* @return
*/
public static String getHtmlByUrl(String url) {
String html = null;
HttpClient httpClient = new DefaultHttpClient();
HttpGet httpGet = new HttpGet(url);
try {
HttpResponse httpResponse = httpClient.execute(httpGet);
int state = httpResponse.getStatusLine().getStatusCode();
if (state == HttpStatus.SC_OK) {
HttpEntity entity = httpResponse.getEntity();
html = EntityUtils.toString(entity);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
httpClient.getConnectionManager().shutdown();
}
return html;
}
/**
* 获取页面源码中的链接
*
* @param html
* @return
*/
public static List<String> getArticleLinks(String html, String css) {
List<String> list = getArticleLinks(html, css, null);
return list;
}
/**
* 获取页面源码中的链接
* @param html
* @param css1
* @param css2
* @return
*/
public static List<String> getArticleLinks(String html, String css1, String css2) {
List<String> list = new ArrayList<String>();
Document doc = getDocument(html);
Elements elements1 = doc.select(css1);
for (Element e : elements1) {
String href = e.attr("href");
list.add(href);
}
if (css2 != null && !css2.isEmpty()) {
Elements elements2 = doc.select(css2);
for (Element e : elements2) {
String href = e.attr("href");
list.add(href);
}
}
return list;
}
/**
* 获取一篇文章,纯文本
*
* @param url
*/
public static Article doArticle(String url) {
String html = getHtmlByUrl(url);
Document doc = getDocument(html);
Element title = doc.select("div.title > h2").first();
Element author = doc.select("div.title > p").first();
String createTime = MyTimeUtil.getNowStr("yyyyMMdd");
Element div = doc.select("div.article_p").first();
String temp = div.html();
String content = temp.replaceAll("<[^>]*>","");
Article article = new Article();
article.setTitle(title.text());
article.setAuthor(author.text());
article.setCreateTime(createTime);
article.setContent(content);
return article;
}
/**
* 获取一篇文章,带html标签的
* @param url
* @return
*/
public static Article doHtmlArticle(String url) {
String html = getHtmlByUrl(url);
Document doc = getDocument(html);
Element title = doc.select("div.title > h2").first();
Element author = doc.select("div.title > p").first();
String createTime = MyTimeUtil.getNowStr("yyyyMMdd");
Element div = doc.select("div.article_p").first();
String temp = div.html();
Article article = new Article();
article.setTitle(title.text());
article.setAuthor(author.text());
article.setCreateTime(createTime);
article.setContent(temp);
return article;
}
public static void doTXT(Article article,String path) {
File txt = new File(path);
if(!txt.exists()) {
txt.mkdir();
}
System.out.println(txt.getAbsolutePath());
File timeFile = new File(txt.getAbsolutePath()+"/"+article.getCreateTime());
if(!timeFile.exists()) {
timeFile.mkdir();
}
File articleFile = new File(timeFile.getAbsolutePath()+"/"+article.getTitle());
try {
articleFile.createNewFile();
BufferedWriter bw = new BufferedWriter(new FileWriter(articleFile));
bw.write(article.getAuthor());
bw.newLine();
bw.write(article.getCreateTime());
bw.newLine();
bw.write(article.getContent());
bw.flush();
} catch (IOException e) {
System.out.println("创建文件失败");
e.printStackTrace();
}
}
/**
* 解析html,获得Document
* @param html
* @return
*/
private static Document getDocument(String html) {
Document document = Jsoup.parse(html);
return document;
}
}
主程序
package com.xx.yahoo;
import com.xx.util.MyJMailUtil;
import java.util.List;
/**
* Created with IntelliJ IDEA.
* User: xx
* Date: 13-3-26
* Time: 下午6:16
* DoWhat:
*/
public class MyPaiRunner {
public static void main(String[] args) {
String url = "http://soul.cn.yahoo.com/";
String css1 = "div.i_right > ul > li > a";
String css2 = "div.i_right > ul > a";
String html = ResourceHtml.getHtmlByUrl(url);
List<String> links = ResourceHtml.getArticleLinks(html, css1, css2);
for (String link : links) {
System.out.println(link);
Article article = ResourceHtml.doHtmlArticle(link);
System.out.println(article.getTitle());
MyJMailUtil.sendMail(article);
}
}
}
util工具包
package com.xx.util;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* Created with IntelliJ IDEA.
* User: xx
* Date: 13-3-27
* Time: 上午11:32
* DoWhat:根据一定格式获得一定格式的时间字符串
*/
public class MyTimeUtil {
public static String getNowStr(String pattern) {
return getNowStr(pattern, null);
}
public static String getNowStr(String pattern, Date date) {
SimpleDateFormat df = new SimpleDateFormat(pattern);
if (date == null) {
date = new Date();
}
String now = df.format(date);
return now;
}
}
package com.xx.util;
import com.xx.yahoo.Article;
import javax.mail.*;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import java.util.Properties;
/**
* Created with IntelliJ IDEA.
* User: xx
* Date: 13-3-28
* Time: 下午2:58
* DoWhat:将获取的文章发送到制定邮箱
*/
public class MyJMailUtil {
private static String host = "smtp.163.com";//163邮箱服务器
private static String to = "xx";//接受邮件的邮箱
private static String from = "xx";//发送邮件的邮箱
private static String username = "xx";//发送邮件的邮箱的用户名
private static String password = "xx";//发送邮件的邮箱的密码
/**
* 发送邮件
* @param article
*/
public static void sendMail(Article article){
//获取Properties对象
Properties properties = System.getProperties();
//添加smtp服务器属性
properties.put("mail.smtp.host",host);
properties.put("mail.smtp.auth", true);
//创建邮件会话
Session session = Session.getDefaultInstance(properties,new Authenticator() {
@Override
protected PasswordAuthentication getPasswordAuthentication() {//验证账号
return new PasswordAuthentication(username, password);
}
});
//是否在控制台显示debug信息
// session.setDebug(true);
try {
//定义邮件信息
MimeMessage message = new MimeMessage(session);
message.setFrom(new InternetAddress(from));
message.addRecipient(Message.RecipientType.TO,new InternetAddress(to));
message.setSubject(article.getTitle());
message.setContent(article.getAuthor()+"\n"+article.getCreateTime()+"\n"+article.getContent(),"text/html;charset=utf-8");
// message.setText(article.getAuthor()+"\n"+article.getCreateTime()+"\n"+article.getContent());
//发送
Transport.send(message);
} catch (MessagingException e) {
e.printStackTrace();
}
}
}
内容都在代码中了,有不清楚的,可以留言,我也是个练习级别的,呵呵