一、简单梳理一下代码:
二、主要代码
public class ParseNewsInfoServiceImpl extends BaseServiceImpl implements
ParseNewsInfoService {
public void add(NewsInfo t) {
// TODO Auto-generated method stub
this.getParseNewsInfoDAO().insert(t);
}
public void delete(NewsInfo t) {
// TODO Auto-generated method stub
this.getParseNewsInfoDAO().delete(t);
}
public NewsInfo ParseNews(String url) {
// TODO Auto-generated method stub
NewsInfo newsInfo = new NewsInfo();
Document doc = null;
try {
doc = Jsoup.connect(url).timeout(10000).get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String title = doc.getElementsByClass("contentTitle").get(0).ownText();
String titleDate = doc.getElementsByClass("postDate").get(0).ownText();
String readNum =doc.getElementsByClass("postviews").get(0).ownText();
String imgURL = doc.select("img").first().absUrl("src");
Element bodyAll=doc.getElementsByClass("pageContentSide").get(0);
String text = doc.select("#NewsPostDetailContent").text();
newsInfo.setTitle(title);
newsInfo.setTitleDate(titleDate);
newsInfo.setReadNum(readNum);
newsInfo.setImgURL(imgURL);
newsInfo.setText(text);
return newsInfo;
}
}
public class ParseNewsListServiceImpl extends BaseServiceImpl implements
ParseNewsListService {
public void add(NewsListBean t) {
// TODO Auto-generated method stub
this.getParseNewsListDAO().insert(t);
}
public void delete(NewsListBean t) {
// TODO Auto-generated method stub
this.getParseNewsListDAO().delete(t);
}
//根据新闻的页数来解析新闻标题
public List ParseNewsList(int pageNum) {
// TODO Auto-generated method stub
Map cookies = new HashMap();
List list = new ArrayList();
Document document = null;
try {
document = Jsoup.connect(getURL(pageNum)).cookies(cookies).get();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
for (Element e : document.getElementsByClass("newsSummarytitle")) {
NewsListBean newsList = new NewsListBean();
newsList.setTitle(e.select("a").attr("title"));
newsList.setLink(e.select("a").attr("href"));
newsList.setTitleDate(CharArrToString.charArrToString(e
.select("span").text().toString()));
list.add(newsList);
}
/*
* for (int i = 0; i
* System.out.println(list.get(i).getTitle() + "\n" +
* list.get(i).getLink()+"\n" + list.get(i).getTitleDate()); }
*/
return list;
}
public static String getURL(int pageNum) {
return "http://www.cs.swust.edu.cn/index.php?mact=News,m0d722,default,1&m0d722number=25&m0d722category=%E6%96%B0%E9%97%BB%E5%8A%A8%E6%80%81&m0d722summarytemplate=newsPageList&m0d722pagenumber="
+ pageNum + "&m0d722returnid=36&page=36";
}
三、单元测试
@org.junit.Test
public void testParseNewsInfo(){
ParseNewsListService parseNewsListService = ctx.getBean(
"ParseNewsListService", ParseNewsListService.class);
ParseNewsInfoService parseNewsInfoService = ctx.getBean(
"ParseNewsInfoService", ParseNewsInfoService.class);
List list = parseNewsListService.ParseNewsList(1);
NewsInfo newsInfo = null;
NewsListBean newsListBean = null;
for(int j=0;j
newsInfo = parseNewsInfoService.ParseNews(list.get(j).getLink());
newsListBean = list.get(j);
newsInfo.setNewsListBean(newsListBean);
parseNewsInfoService.add(newsInfo);
}
}
四、运行结果:
五、展望
当然以后肯定要加spring的任务调度,保证数据库数据的时效性!