perl 获取网页内容
2009-05-31 17:22
一、LWP::Simple 模块 获取代码:
#!/usr/bin/perl
use strict; use warnings; use LWP::Simple; my $url=" http://www.test.com "; my $page=get ($url); print ("\n $page \n \n"); my $status=getprint($url); print ("\n\n $status\n"); $status=getstore($url,"page.txt"); print ("\n $status \n");
二、HTML解析: HTML::TokeParser模块
#!/usr/bin/perl
2 use strict; 3 use warnings; 4 use LWP::UserAgent; 5 use HTML::TokeParser; 6 my $url=" http://www.test.com "; 7 my $agent=new LWP::UserAgent(); 8 my $request=new HTTP::Request('GET'=>$url); 9 my $response=$agent->request($request); 10 my $document=$response->content(); 11 12 my $page=HTML::TokeParser->new (\$document); 13 while (my $token=$page->get_token()){ 14 my $type=shift(@{$token}); 15 my $text=shift(@{$token}); 16 if ($type eq "T"){ 17 print ("$text"); 18 } |