Results 1 to 2 of 2
  1. #1
    Join Date
    Dec 2005
    Posts
    2

    Unanswered: URL only retrieved first time?

    Perl version 5:
    Windows XP.

    I am using the following code to retrieve a URL, parse for links, and convert the links to absolute values. The first time I call the code it retrieves the page and each subsequent request does not retrieve the page. How can I modify the code to solve this problem.


    [code]
    $ua1 = new LWP::UserAgent;
    # Set up a callback that collect image links
    my @imgs = ();
    sub callback {
    my($tag, %attr) = @_;
    return if $tag ne 'img'; # we only look closer at <img ...>
    push(@imgs, values %attr);
    }
    # Make the parser. Unfortunately, we don't know the base yet
    # (it might be diffent from $url)
    $p = HTML::LinkExtor->new(\&callback);
    # Request document and parse it as it arrives
    $res = $ua1->request(HTTP::Request->new(GET => $url),
    sub {$p->parse($_[0])});
    # Expand all image URLs to absolute ones
    my $base = $res->base;
    @imgs = map { $_ = url($_, $base)->abs; } @imgs;
    # Print them out
    print join("\n", @imgs), "\n";
    [\code]

  2. #2
    Join Date
    Jun 2004
    Location
    Nowhere Near You
    Posts
    89
    Multiple executions of this

    Code:
    #! /users/bin/perl 
    
    use strict;
    use warnings;
    use LWP::UserAgent;
    use HTML::LinkExtor;
    use URI;
    
       my($ua1)=new LWP::UserAgent;
    
       # Set up a callback that collect image links
       my(@imgs)=();
       sub callback {
          my($tag, %attr) = @_;
          return if $tag ne 'img'; # we only look closer at <img ...>
          push(@imgs, values %attr);
           };
       my($url)="http://sigforum.com/groupee/forums/a/tpc/f/9666031561/m/12510103";
       # Make the parser. Unfortunately, we don't know the base yet
       # (it might be diffent from $url)
       my($p)=HTML::LinkExtor->new(\&callback);
       # Request document and parse it as it arrives
       my($res)=$ua1->request(HTTP::Request->new(GET => $url),sub {$p->parse($_[0])});
       # Expand all image URLs to absolute ones
       my($base)=$res->base;
       @imgs=map { $_ = URI->new_abs($_,$url); } @imgs;
       # Print them out
       print join("\n", @imgs), "\n";
    work fine.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •