#!/usr/bin/perl -w use strict; $|=1; use LWP::UserAgent; use HTTP::Cookies; use HTTP::Request::Common; use HTML::LinkExtor; # subset of %HTML::Tagset::linkElements my %LINKS = ( 'applet' => ['archive', 'codebase', 'code'], 'bgsound' => ['src'], 'body' => ['background'], 'embed' => ['src'], 'frame' => ['src'], 'iframe' => ['src'], 'ilayer' => ['background'], 'img' => ['src', 'lowsrc'], 'input' => ['src'], 'layer' => ['background', 'src'], 'script' => ['src'], 'table' => ['background'], 'td' => ['background'], 'th' => ['background'], 'tr' => ['background'], ); my $ua = LWP::UserAgent->new; $ua->env_proxy; $ua->agent("speedometer/1.00 ".$ua->agent); # identify ourselves $ua->cookie_jar(HTTP::Cookies->new); # capture cookies if needed report($_) for @ARGV; exit 0; sub report { my $start = shift; my @todo = ["", $start]; my %done; while (@todo) { my ($refer, $url) = @{shift @todo}; next if exists $done{$url}; my $request = GET $url, [referer => $refer]; my $response = $ua->simple_request($request); if ($response->is_success) { $done{$url} = length (my $content = $response->content); next if $response->content_type ne "text/html"; my $base = $response->base; # relative URLs measured # relative to here my $p = HTML::LinkExtor->new(undef, $base) or die; $p->parse($content); $p->eof; for my $link ($p->links) { my ($tag, %attr) = @$link; if ($LINKS{$tag}) { for (@{$LINKS{$tag}}) { next unless exists $attr{$_}; next unless length (my $a = $attr{$_}); ## print "$base $tag $_ => $a\n"; ## debug push @todo, [$base, $a]; } } } } elsif ($response->is_redirect) { $done{$url} = length $response->content; # this counts my $location = $response->header('location') or next; push @todo, [$url, $location]; # but get this too } elsif ($response->is_error) { print "$url ERROR: ", $response->status_line, "\n"; } } # end of outer loop { my $total = 0; my $i = 0; my $elements = 0; my $buffer = ''; my $limit = 6; for my $url (sort { $done{$b} <=> $done{$a} } keys %done) { if ($i < $limit) { $i++; $buffer .= sprintf (" %7d %s\n", $done{$url}, $url); } $total += $done{$url}; $elements++; } print "-"x60; print "\nHTML Page: $start\n"; print "-"x60; print "\n\n".commify($total)." TOTAL BYTES DOWNLOADED ($elements items total)\n\n"; printf "%7.2f seconds at 28.8\n", $total/3600; printf "%7.2f seconds at 36.6\n", $total/4575; printf "%7.2f seconds at 56.0\n", $total/7000; printf "%7.2f seconds at 128.0\n", $total/16000; printf "%7.2f seconds at 256.0\n", $total/32000; print "\n"; print (($i != $limit) ? $i : $limit); print " largest elements of the page with sizes in bytes\n"; print "-"x60; print "\n$buffer\n"; } } sub commify { my $input = shift; $input = reverse $input; $input =~ s<(\d\d\d)(?=\d)(?!\d*\.)><$1,>g; return scalar reverse $input; }