File: //usr/bin/X11/X11/X11/mech-dump
#!/usr/bin/perl
# PODNAME: mech-dump
# ABSTRACT: Dumps information about a web page
use warnings;
use strict;
use WWW::Mechanize ();
use Getopt::Long;
use Pod::Usage;
use HTTP::Cookies;
my @actions;
my $absolute;
my $all;
my $user;
my $pass;
my $agent;
my $agent_alias;
my $cookie_filename;
GetOptions(
    'user=s'        => \$user,
    'password=s'    => \$pass,
    headers         => sub { push( @actions, \&dump_headers ) },
    forms           => sub { push( @actions, \&dump_forms ) },
    links           => sub { push( @actions, \&dump_links ) },
    images          => sub { push( @actions, \&dump_images ) },
    all             => sub { $all++; push( @actions, \&dump_headers, \&dump_forms, \&dump_links, \&dump_images ) },
    text            => sub { push( @actions, \&dump_text ) },
    absolute        => \$absolute,
    'agent=s'       => \$agent,
    'agent-alias=s' => \$agent_alias,
    'cookie-file=s' => \$cookie_filename,
    help            => sub { pod2usage(1); },
    version         => sub { print STDERR $WWW::Mechanize::VERSION, "\n"; exit 0; },
) or pod2usage(2);
my @uris = @ARGV or die "Must specify a URL or file to check.  See --help for details.\n";
@actions = (\&dump_forms) unless @actions;
binmode(STDOUT, ':utf8');
my $mech = WWW::Mechanize->new( autocheck => 0 );
if ( defined $agent ) {
    $mech->agent( $agent );
}
elsif ( defined $agent_alias ) {
    $mech->agent_alias( $agent_alias );
}
if ( defined $cookie_filename ) {
    my $cookies = HTTP::Cookies->new( file => $cookie_filename, autosave => 1, ignore_discard => 1 );
    $cookies->load() ;
    $mech->cookie_jar($cookies);
}
else {
    $mech->cookie_jar(undef) ;
}
$mech->env_proxy();
foreach my $uri (@uris) {
  if ( -e $uri ) {
      require URI::file;
      $uri = URI::file->new_abs( $uri )->as_string;
  }
  my $response = $mech->get( $uri );
  if (!$response->is_success and defined ($response->www_authenticate)) {
      if (!defined $user or !defined $pass) {
          die("Page requires username and password, but none specified.\n");
      }
      $mech->credentials($user,$pass);
      $response = $mech->get( $uri );
      $response->is_success or die "Can't fetch $uri with username and password\n", $response->status_line, "\n";
  }
  $mech->is_html or die qq{$uri returns type "}, $mech->ct, qq{", not "text/html"\n};
  foreach my $action (@actions ) {
      $action->( $mech );
      print "\n" if @actions;
  }
}
sub dump_headers {
    my $mech = shift;
    print "--> Headers:\n" if $all;
    $mech->dump_headers( undef );
    return;
}
sub dump_forms {
    my $mech = shift;
    print "--> Forms:\n" if $all;
    $mech->dump_forms( undef );
    return;
}
sub dump_links {
    my $mech = shift;
    print "--> Links:\n" if $all;
    $mech->dump_links( undef, $absolute );
    return;
}
sub dump_images {
    my $mech = shift;
    print "--> Images:\n" if $all;
    $mech->dump_images( undef, $absolute );
    return;
}
sub dump_text {
    my $mech = shift;
    $mech->dump_text();
    return;
}
__END__
=pod
=encoding UTF-8
=head1 NAME
mech-dump - Dumps information about a web page
=head1 VERSION
version 2.06
=head1 SYNOPSIS
mech-dump [options] [file|url]
Options:
    --headers              Dump HTTP response headers
    --forms                Dump table of forms (default action)
    --links                Dump table of links
    --images               Dump table of images
    --all                  Dump all four of the above, in that order
    --text                 Dumps the textual part of the web page
    --user=user            Set the username
    --password=pass        Set the password
    --cookie-file=filename Set the filename to use for persistent cookies
    --agent=agent          Specify the UserAgent to pass
    --agent-alias=alias
                           Specify the alias for the UserAgent to pass.
                           Pick one of:
                               * Windows IE 6
                               * Windows Mozilla
                               * Mac Safari
                               * Mac Mozilla
                               * Linux Mozilla
                               * Linux Konqueror
    --absolute             Show URLs as absolute, even if relative in the page
    --help                 Show this message
The order of the options specified is relevant.  Repeated options
get repeated dumps.
Proxy settings are specified through the environment (e.g. C<http_proxy=http://proxy.my.place/>).
See LWP::UserAgent for details.
=head1 SEE ALSO
L<WWW::Mechanize>
=head1 AUTHOR
Andy Lester <andy at petdance.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2004 by Andy Lester.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut