html解析链接,在线解析html,获得需要的url

public class last {

public static void main(String[] args) throws IOException, ParserException

{

int boardid=0;

int page=0;

String url;

last urls=new last();

for(int i=2;i<623;i++)

{

boardid=i+1;

for(int j=1;j<466;j++)

{

page=j+1;

url="http://www.cc98.org/list.asp?boardid="+boardid+"&page="+page;

if(urls.write_file_url(url))

break;

System.out.println("http://www.cc98.org/list.asp?boardid="+boardid+"&page="+page);

}

}

System.out.println("end");

}

public String oldurl="http://www.cc98.org/list.asp?index.asp";

public String newurl="http://www.cc98.org/list.asp?index.asp";

public boolean write_file_url(String url)throws IOException, ParserException

{

ConnectionManager manager;

FileWriter fw = new FileWriter("F://htmls/compare/1.txt", true);

BufferedWriter bw = new BufferedWriter(fw);

manager = Page.getConnectionManager();

Parser parser = new Parser(manager.openConnection(url));

parser.setEncoding("utf-8");

NodeFilter filter =new AndFilter(new TagNameFilter("a"),new HasAttributeFilter("id"));

NodeList nodelist=parser.parse(filter);

NodeIterator it=nodelist.elements();

while(it.hasMoreNodes())

{

Node node=(Node)it.nextNode();

System.out.println(node.toHtml());

bw.write(node.toHtml());

bw.newLine();

bw.flush();

}

bw.close();

fw.close();

newurl=url;

if(last.compare_html(oldurl, newurl))

{

return true;

}

else

{

oldurl=newurl;

return false;

}

}

public static boolean compare_html(String path,String path2) throws ParserException

{

ConnectionManager manager;

manager = Page.getConnectionManager();

Parser parser = new Parser(manager.openConnection(path));

Parser parser1=new Parser(manager.openConnection(path2));

parser1.setEncoding("utf-8");

parser.setEncoding("utf-8");

NodeFilter filter = new AndFilter(new TagNameFilter("div"),new HasChildFilter(new TagNameFilter("font")));

NodeList nodelist=parser.parse(filter);

NodeFilter filter1=new AndFilter(new TagNameFilter("div"),new HasChildFilter(new TagNameFilter("font")));

NodeList nodelist1=parser1.parse(filter1);

if(nodelist1.toString().isEmpty())

return true;

if(nodelist1.toString().equals(nodelist.toString()))

return true;

else

return false;

}

}

通过htmlparser 分析网页信息,获得需要的 信息,保存到本地text文件中

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值