一个简单的多的socket http 下载原型 perl

基于perl,使用IO::Select实现,并非多线程。可指定分几部分下载。
基本上没有作异常处理,没有处理redirect,甚至也没有判断对range头的响应是否为206.

还好的是它还可以工作,比wget快几倍地下载,挺好玩的.

perl module:

package HttpClient;

use strict;
use warnings;

use IO::Socket::INET;
use Data::Dumper;

my $crlf = “/r/n”;

my $buf_size = 8 * 1024;

sub new {
my $class = shift;
my %cnf = (@_);
my $self = {
state => ‘init’,
url => $cnf{url},
‘total_parts’ => $cnf{’total_parts’},
part => $cnf{part},
‘content_length’ => $cnf{’content_length’},
};
my $url = $self->{url};
my $host = $1 if $url =~ m{://([^/]*)};
my $file = $1 if $url =~ m{/([^/]*)$};
if ( defined $self->{part} ) {
$file .= “.part” . $self->{part};
}
$self->{host} = $host;
$self->{file} = $file;
my $port = 80;
$port = $1 if $host =~ /:(/d+)/;
my $sock = IO::Socket::INET->new(
PeerAddr => $host,
PeerPort => $port,
Proto => ‘tcp’,
Blocking => 0,
)
or die “can’t connect to server:$!/n”;
select($sock);
$| = 1;
select(STDOUT);
$self->{sock} = $sock;
bless $self, $class;
return $self;
}

sub sock {
return shift->{sock};
}

sub get_request_header {
my $self = shift;
return $self->{request} if defined $self->{request};
my $request =
“GET $self->{url} HTTP/1.1$crlf”
. “Host: $self->{host}$crlf”
. “Connection: close$crlf”;
if ( defined $self->{’total_parts’}
and defined $self->{part}
and defined $self->{’content_length’} )
{
my $length = $self->{’content_length’};
my $total_parts = $self->{’total_parts’};
my $part = $self->{part};
my $part_size = int( $length / $total_parts );
my $start_pos = $part_size * $part;
my $recved = 0;
if (-e $self->{file}) {
$recved = -s $self->{file};
$start_pos+=$recved;
}

my $recv_size =
( $part == $total_parts - 1 ) ? $length-$part*$part_size : $part_size;
$self->{start_pos} = $start_pos;
$self->{recv_size} = $recv_size-$recved;
print “part $self->{part} recv_size=$self->{recv_size},start_pos=$start_pos,recved=$recved,parts=$total_parts,length=$length/n”;
$request .=
“Range: bytes=$start_pos-” . ( $start_pos + $recv_size-1 ) . $crlf;
}
$request .= $crlf;
$self->{request} = $request;
return $request;
}

sub parse_header {
my ($self) = @_;
my $data = $self->{data};
return 1 if $self->{state} =~ /body/;
return 0 unless defined $data;
return 0 unless $data =~ m{^(.*?)(/r/n/r/n|/n/n)}s;
my $header_content = $1;
my $header_end = $2;
print $header_content, “/n”;
my @headers = split //r?/n/, $header_content;
die “invalid header/n” unless scalar(@headers) > 0;
my $status_line = shift @headers;
$self->{status_line} = $status_line;
$self->{code} = $2 if $status_line =~ m{HTTP/1(.1)? (/d+)};
my $last_header;
my $header = {};

foreach my $line (@headers) {
if ( $line =~ /^/s+(.*)$/ ) {
$header->{$last_header} .= ” $1″;
}
elsif ( $line =~ /^([^:]+): (.*)$/ ) {
$last_header = $1;
my $value = $2;
$header->{$last_header} = $value;
}
else {
print “invalid header:$line/n”;
}
}
$self->{header} = $header;
$self->{’content_length’} = $header->{’Content-Length’}
unless defined $self->{’content_length’};
$self->{recv_size} = $self->{’content_length’};
$self->{data} = substr($data,length($header_content)+length($header_end));
$self->{state} = ‘body’;
return 1;
}

sub recv_data {
my ( $self, $data ) = @_;
if ( defined $self->{data} ) {
$self->{data} .= $data;
}
else {
$self->{data} = $data;
}
}

sub save_data {
my ( $self, $read_select ) = @_;
my $fh = $self->{fh};
if ( !defined $fh ) {
open $fh, “>$self->{file}” or die “can’t open file $self->{file} :$!/n”;
binmode $fh,”:bytes”;
$self->{fh} = $fh;
}
my $write_len = $self->{write_len} || 0;
my $recv_size = $self->{recv_size};
my $data = $self->{data};
my $max_len = length($data);
return unless $max_len > 0;
if ($max_len+$write_len > $recv_size) {
$max_len = $recv_size - $write_len ;
my $part = $self->{part} || 0;
print “part=$part,max_len=$max_len,write_len=$write_len/n”;
}

if ( $max_len == 0 ) {
$self->{done} = 1;
close $self->{fh};
$read_select->remove( $self->sock );
close $self->{sock};
print “$self->{file} recved $write_len bytes/n”;
$self->{parent}->child_done($self) if $self->{parent};
return;
}
my $len = syswrite( $fh, $data, $max_len )
or die “write data failed :$!/n”;
$self->{data} = substr( $data, $len );
$write_len += $len;
$self->{write_len} = $write_len;
}

sub child_done {
my ( $self, $child ) = @_;
$child->{done} = 1;
return unless $self->{done};
foreach my $c ( @{ $self->{children} } ) {
return unless $c->{done};
}

print “merge file/n”;
open FH, “>>$self->{file}”;
print “first part size:”,-s $self->{file},”/n”;
seek( FH, 0, 2 );
foreach my $c ( @{ $self->{children} } ) {
print “$c->{file} size:”,-s $c->{file},”/n”;
open CFH, “<$c->{file}”;

print “merge $c->{file}/n”;
my $buf;
for ( ; ; ) {
my $len = sysread( CFH, $buf, $buf_size );
last if !defined $len || $len == 0;
syswrite( FH, $buf, $len );
}
close CFH;
unlink $c->{file};
}
close FH;
}

sub handle_read {
my ( $self, $sock, $read_select, $write_select, $sock_client ) = @_;

my $data;
my $len = sysread( $sock, $data, $buf_size );
if ( $len == 0 ) {
print “sock $sock finished/n”;
$read_select->remove($sock);
close $sock;
print “$self->{file} size=”,-s $self->{file},”/n”;
$self->{parent}->child_done($self) if $self->{parent};
return;
}
$self->recv_data($data);
if ( $self->{state} !~ /body/ and $self->parse_header ) {
if ( !defined $self->{parent} and defined $self->{content_length} ) {
my $parts = $self->{total_parts} || 5;
$self->{children} = [];
my $length = $self->{’content_length’};
my $part_size = int( $length / $parts );
$self->{recv_size} = $part_size;
print “parent recv_size=$self->{recv_size}/n”;
foreach my $part ( 1 .. $parts - 1 ) {
my $child = HttpClient->new(
url => $self->{url},
‘total_parts’ => $parts,
part => $part,
‘content_length’ => $self->{content_length},
);
$sock_client->{ $child->sock } = $child;
$child->{parent} = $self;
push @{ $self->{children} }, $child;
$read_select->add( $child->sock );
$write_select->add( $child->sock );
}
}
}
else {
$self->save_data($read_select);
}
}

sub handle_write {
my ( $self, $sock, $read_select, $write_select, $sock_client ) = @_;

my $offset = 0;
$offset = $self->{request_offset} if defined $self->{request_offset};
my $request = $self->get_request_header;
if ( $offset == 0 ) {
print “try to send request/n”;
print $request;
}
print “offset=$offset/n”;
my $len = syswrite( $sock, $request, length($request) - $offset, $offset );
if ( !defined $len ) {
print STDERR “write failed:$!/n”;
$read_select->remove($sock);
$write_select->remove($sock);
}
else {
$offset += $len;
$self->{request_offset} = $offset;
if ( $offset == length($request) ) {
$write_select->remove($sock);
}
}
}

sub start {
my ($self) = @_;
use IO::Select;
my $r = IO::Select->new;
$r->add( $self->sock );

my $w = IO::Select->new;
$w->add( $self->sock );

my $sock_client = { $self->sock => $self };

use Time::HiRes qw(time);
my $start_time = time;
for ( ; ; ) {
last if ( $r->count == 0 );
my ( $rout, $wout, $eout ) = IO::Select->select( $r, $w, $r );
last unless defined $rout;

foreach my $sock ( @{$wout} ) {
my $c = $sock_client->{$sock};
if ( !defined $c ) {
die “oops,can’t find httpclient for $sock/n”;
}
$c->handle_write( $sock, $r, $w, $sock_client );
}
foreach my $sock ( @{$rout} ) {
my $c = $sock_client->{$sock};
if ( !defined $c ) {
die “oops,can’t find httpclient for $sock/n”;
}
$c->handle_read( $sock, $r, $w, $sock_client );
}
}
my $end_time = time;
my $used_time = $end_time - $start_time;
my $speed = $self->{content_length} / $used_time;
print “Done,spend $used_time seconds,speed:$speed bytes/seconds/n”;
}

1;

test perl script:

#!/usr/bin/perl
use strict;
use warnings;

use lib ‘.’;

use HttpClient;
use Getopt::Long;
$| = 1;
#my $url = ‘http://eclipse.cdpa.nsysu.edu.tw/downloads/drops/R-3.2.1-200609210945/eclipse-SDK-3.2.1-linux-gtk.tar.gz’;

my $url = ”;
my $total_parts = 1;
my $result = GetOptions (”url|u=s” => /$url,
                        “parts|p=i”   => /$total_parts,
                        );
unless ($result and $url=~m{://}) {
    print <<HELP
usage: perl http.pl –url=url [–parts=parts]
HELP
;
    exit;
}

my $client = HttpClient->new(url=>$url,’total_parts’=>$total_parts);
$client->start();

参考:

RFC2616 - HTTP/1.1 Specification

technorati tags:perl, http, downloader, protocol

Blogged with Flock

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值