node2:/root/pachong/tongbanjie#cat test.pl
use LWP::UserAgent;
use POSIX;
use HTML::TreeBuilder::XPath;
use Encode;
use HTML::TreeBuilder;
use Data::Dumper;
use HTML::TreeBuilder::XPath;
use DBI;
use Encode;
my $user="root";
my $passwd="1234567";
$dbh = DBI->connect("dbi:mysql:database=licai;host=127.0.0.1;port=3306",$user,$passwd) or die "can't connect to database ". DBI-errstr;
$dbh->do("SET NAMES utf8");
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse_file("test.htm
perl 获取铜板街页码
本文介绍了如何利用Perl编程语言解析铜板街网站的HTML内容,提取出页面的分页数,从而实现对大量网页数据的自动化抓取。通过对HTTP请求的模拟和正则表达式的运用,详细阐述了具体实现步骤。
摘要由CSDN通过智能技术生成