拔取网页 是http://mm.10086.cn/android/info/300008730468.html?from=www&fw=227062网页
打开网页 网页的bug模式(F12)
找出你想要爬取的数据
代码
package com.baidu;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.HasChildFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import com.baidu.apply.bean.Apply;
import com.baidu.util.WebUtils;
import com.baidu.util.httpDownload;
public class HtmlParse {
public static void main(String[] args) throws ParserException {
Parser parse=new Parser("http://mm.10086.cn/android/info/300008730468.html?from=www&fw=227062");
parse.setEncoding("UTF-8");
//获取应用的名称
Apply apply=new Apply();
String appName="";
TagNameFilter nameFilter = new TagNameFilter("div");
AndFilter andFilter = new AndFilter(nameFilter, new HasAttributeFilter("class","mj_big_title font-f-yh"));
NodeList list= parse.parse(andFilter);
Tag tag=(Tag) list.elementAt(0);
System.out.println(tag.toPlainTextString());
//清零
parse.reset();