package
atest;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters. * ;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util. * ;
import com.jdon.controller.events.EventModel;
import cn.vetech.framework.base.dao.B_class_dao;
import cn.vetech.framework.base.dao.B_class_dao_sql;
import cn.vetech.framework.dao.SqlMapDaoTemplateFactory;
import cn.vetech.framework.news.dao.B_news_dao;
import cn.vetech.framework.news.dao.B_news_dao_sql;
import cn.vetech.framework.news.model.B_news;
import cn.vetech.framework.news.service.B_news_service;
import cn.vetech.framework.news.service.B_news_service_imp;
import cn.vetech.framework.util.VeDate;
/** */ /**
* 抓取中华培训网规则 1.抓取chinahtml/zixunzhongxin目录下的文件
* 说明:循环网址下所有链接,根据一定规则过滤掉一部分链接,读出页面指定节点下的内容
* @author sam.zhang
*
*/
public class TestParser ... {
private B_news_dao b_news_dao;
private B_class_dao b_class_dao;
private B_news_service b_news_service;
private String TRAINING_URL = "http://www.china-training.com"; // 中华培训网网址
private static String TRAINING_ZXURL = "http://www.china-training.com/newpage/zxzx.asp"; // 中华培训网资讯根网址
private String FILEPATH = "chinahtml"; // 中华培训网放置新闻的根文件夹名
private String ZXZX = "zixunzhongxin"; // 存放资讯中心的目录
public TestParser() ...{
SqlMapDaoTemplateFactory sqldao = new SqlMapDaoTemplateFactory();
b_news_dao = new B_news_dao_sql(sqldao);
b_class_dao = new B_class_dao_sql(sqldao);
b_news_service = new B_news_service_imp(b_news_dao, b_class_dao, null,
null);
}
/** *//**
* 得到指定网址下所有链接
*
* @param url
*/
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters. * ;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util. * ;
import com.jdon.controller.events.EventModel;
import cn.vetech.framework.base.dao.B_class_dao;
import cn.vetech.framework.base.dao.B_class_dao_sql;
import cn.vetech.framework.dao.SqlMapDaoTemplateFactory;
import cn.vetech.framework.news.dao.B_news_dao;
import cn.vetech.framework.news.dao.B_news_dao_sql;
import cn.vetech.framework.news.model.B_news;
import cn.vetech.framework.news.service.B_news_service;
import cn.vetech.framework.news.service.B_news_service_imp;
import cn.vetech.framework.util.VeDate;
/** */ /**
* 抓取中华培训网规则 1.抓取chinahtml/zixunzhongxin目录下的文件
* 说明:循环网址下所有链接,根据一定规则过滤掉一部分链接,读出页面指定节点下的内容
* @author sam.zhang
*
*/
public class TestParser ... {
private B_news_dao b_news_dao;
private B_class_dao b_class_dao;
private B_news_service b_news_service;
private String TRAINING_URL = "http://www.china-training.com"; // 中华培训网网址
private static String TRAINING_ZXURL = "http://www.china-training.com/newpage/zxzx.asp"; // 中华培训网资讯根网址
private String FILEPATH = "chinahtml"; // 中华培训网放置新闻的根文件夹名
private String ZXZX = "zixunzhongxin"; // 存放资讯中心的目录
public TestParser() ...{
SqlMapDaoTemplateFactory sqldao = new SqlMapDaoTemplateFactory();
b_news_dao = new B_news_dao_sql(sqldao);
b_class_dao = new B_class_dao_sql(sqldao);
b_news_service = new B_news_service_imp(b_news_dao, b_class_dao, null,
null);
}
/** *//**
* 得到指定网址下所有链接
*
* @param url
*/