From f141b014fa88900cce9516c05dc8de8d16adb471 Mon Sep 17 00:00:00 2001 From: Sebastian Riedel Date: Fri, 4 Apr 2025 15:50:27 +0200 Subject: [PATCH] Add downloads method to Mojo::File and Mojo::UserAgent::Transactor --- lib/Mojo/File.pm | 27 ++++++ lib/Mojo/UserAgent/Transactor.pm | 48 ++++++++++ lib/Mojolicious/Guides/Cookbook.pod | 8 ++ t/mojo/file_download.t | 131 ++++++++++++++++++++++++++++ t/mojo/transactor.t | 84 ++++++++++++++++++ 5 files changed, 298 insertions(+) create mode 100644 t/mojo/file_download.t diff --git a/lib/Mojo/File.pm b/lib/Mojo/File.pm index 5bfc9343c2..fe24e2276b 100644 --- a/lib/Mojo/File.pm +++ b/lib/Mojo/File.pm @@ -38,6 +38,14 @@ sub curfile { __PACKAGE__->new(Cwd::realpath((caller)[1])) } sub dirname { $_[0]->new(scalar File::Basename::dirname ${$_[0]}) } +sub download { + my ($self, $url, $options) = (shift, shift, shift // {}); + my $ua = $options->{ua} + || do { require Mojo::UserAgent; Mojo::UserAgent->new(max_redirects => 10, max_response_size => 0) }; + my $tx = _download_error($ua->transactor->download($ua->head($url => $options->{headers} // {}), $$self)); + return $tx ? !!_download_error($ua->start($tx)) : 1; +} + sub extname { shift->basename =~ /.+\.([^.]+)$/ ? $1 : '' } sub is_abs { file_name_is_absolute ${shift()} } @@ -177,6 +185,15 @@ sub touch { sub with_roles { shift->Mojo::Base::with_roles(@_) } +sub _download_error { + my $tx = shift; + + return $tx unless my $err = $tx->error; + return undef if $err->{message} eq 'Download complete' || $err->{message} eq 'Download incomplete'; + croak "$err->{code} response: $err->{message}" if $err->{code}; + croak "Download error: $err->{message}"; +} + 1; =encoding utf8 @@ -301,6 +318,16 @@ Return all but the last level of the path with L as a Ldirname; +=head2 download + + my $bool = $path->download('https://example.com/test.tar.gz'); + my $bool = $path->download('https://example.com/test.tar.gz', {headers => {Accept => '*/*'}}); + my $bool = $path->download('https://example.com/test.tar.gz', {ua => Mojo::UserAgent->new}); + +Download file from URL, returns true once the file has been downloaded completely. Incomplete downloads are resumed. +Follows C<10> redirects by default and does not limit the size of the response, which will be streamed memory +efficiently. Note that this method is B and might change without warning! + =head2 extname my $ext = $path->extname; diff --git a/lib/Mojo/UserAgent/Transactor.pm b/lib/Mojo/UserAgent/Transactor.pm index 177219b2a2..1297610224 100644 --- a/lib/Mojo/UserAgent/Transactor.pm +++ b/lib/Mojo/UserAgent/Transactor.pm @@ -20,6 +20,47 @@ has name => 'Mojolicious (Perl)'; sub add_generator { $_[0]->generators->{$_[1]} = $_[2] and return $_[0] } +sub download { + my ($self, $head, $path) = @_; + + my $req = $head->req; + my $tx = $self->tx(GET => $req->url->clone => $req->headers->to_hash); + my $res = $tx->res; + if (my $error = $head->error) { $res->error($error) and return $tx } + + my $headers = $head->res->headers; + my $accept_ranges = ($headers->accept_ranges // '') =~ /bytes/; + my $size = $headers->content_length // 0; + + my $current_size = 0; + my $file = path($path); + if (-f $file) { + $current_size = -s $file; + $res->error({message => 'Unknown file size'}) and return $tx unless $size; + $res->error({message => 'File size mismatch'}) and return $tx if $current_size > $size; + $res->error({message => 'Download complete'}) and return $tx if $current_size == $size; + $res->error({message => 'Server does not support partial requests'}) and return $tx unless $accept_ranges; + $tx->req->headers->range("bytes=$current_size-$size"); + } + + my $fh = $file->open('>>'); + $res->content->unsubscribe('read')->on( + read => sub { + my ($content, $bytes) = @_; + $current_size += length $bytes; + $fh->syswrite($bytes) == length $bytes or $res->error({message => qq/Can't write to file "$path": $!/}); + } + ); + $res->on( + finish => sub { + my $res = shift; + $res->error({message => 'Download incomplete'}) if $current_size < $size; + } + ); + + return $tx; +} + sub endpoint { my ($self, $tx) = @_; @@ -371,6 +412,13 @@ Register a content generator. $t->add_generator(foo => sub ($t, $tx, @args) {...}); +=head2 download + + my $tx = $t->download(Mojo::Transaction::HTTP->new, '/home/sri/test.tar.gz'); + +Build L resumable file download request as follow-up to a C request. Note that this +method is B and might change without warning! + =head2 endpoint my ($proto, $host, $port) = $t->endpoint(Mojo::Transaction::HTTP->new); diff --git a/lib/Mojolicious/Guides/Cookbook.pod b/lib/Mojolicious/Guides/Cookbook.pod index 2db4f34c45..a9fd89258b 100644 --- a/lib/Mojolicious/Guides/Cookbook.pod +++ b/lib/Mojolicious/Guides/Cookbook.pod @@ -1300,6 +1300,14 @@ L. my $tx = $ua->get('https://www.github.com/mojolicious/mojo/tarball/main'); $tx->result->save_to('mojo.tar.gz'); +With L you can also stream file downloads directly into the target file. This allows for +interrupted downloads to be resumed at a later time. + + use Mojo::File qw(path); + + # Download the latest Mojolicious tarball directly into a file + path('/home/sri/mojo.tar.gz')->download('https://www.github.com/mojolicious/mojo/tarball/main'); + To protect you from excessively large files there is also a limit of 2GiB by default, which you can tweak with the attribute L. diff --git a/t/mojo/file_download.t b/t/mojo/file_download.t new file mode 100644 index 0000000000..9cbad5c725 --- /dev/null +++ b/t/mojo/file_download.t @@ -0,0 +1,131 @@ +use Mojo::Base -strict; + +BEGIN { $ENV{MOJO_REACTOR} = 'Mojo::Reactor::Poll' } + +use Test::More; +use Mojo::File qw(tempdir); +use Mojo::UserAgent; +use Mojolicious::Lite; + +# Silence +app->log->level('trace')->unsubscribe('message'); + +get '/simple' => sub { + my $c = shift; + $c->res->headers->accept_ranges('bytes'); + return $c->render(data => 'CHUNK1CHUNK2CHUNK3'); +}; + +get '/redirect' => sub { + my $c = shift; + return $c->redirect_to('/simple'); +}; + +get '/resume' => sub { + my $c = shift; + $c->res->headers->accept_ranges('bytes'); + return $c->render(data => 'CHUNK1CHUNK2CHUNK3') if $c->req->method eq 'HEAD'; + my $range = $c->req->headers->range; + return $c->write('CHUNK1')->finish unless $range; + return $c->write('CHUNK2')->finish if $range eq 'bytes=6-18'; + return $c->write('CHUNK3')->finish; +}; + +get '/stream' => sub { + my $c = shift; + my $chunks = [qw(CHUNK1 CHUNK2 CHUNK3 CHUNK4)]; + $c->res->code(200); + $c->res->headers->content_type('text/plain'); + my $cb = sub { + my $content = shift; + my $chunk = shift @$chunks || ''; + $content->write_chunk($chunk, $chunk ? __SUB__ : undef); + }; + $c->res->content->$cb; + $c->rendered; +}; + +get '/header' => sub { + my $c = shift; + my $custom = $c->req->headers->header('X-Custom-Header') || 'MISSINGHEADER'; + $c->res->headers->accept_ranges('bytes'); + return $c->render(data => $custom); +}; + +my $dir = tempdir; +my $ua = Mojo::UserAgent->new; + +subtest 'Basic file download' => sub { + my $file = $dir->child('simple1.txt'); + ok $file->download('/simple'), 'file downloaded'; + ok -e $file, 'file exists'; + is $file->slurp, 'CHUNK1CHUNK2CHUNK3', 'right content'; +}; + +subtest 'File already downloaded' => sub { + my $file = $dir->child('simple1.txt'); + is $file->slurp, 'CHUNK1CHUNK2CHUNK3', 'right content'; + ok $file->download('/simple'), 'file downloaded'; + is $file->slurp, 'CHUNK1CHUNK2CHUNK3', 'right content'; +}; + +subtest 'Basic file download (custom header)' => sub { + my $file = $dir->child('header1.txt'); + ok $file->download('/header', {headers => {'X-Custom-Header' => 'CORRECTFILECONTENT'}}), 'file downloaded'; + ok -e $file, 'file exists'; + is $file->slurp, 'CORRECTFILECONTENT', 'right content'; + + my $file2 = $dir->child('header2.txt'); + ok $file2->download('/header'), 'file downloaded'; + ok -e $file2, 'file exists'; + is $file2->slurp, 'MISSINGHEADER', 'right content'; +}; + +subtest 'Basic file download (redirect)' => sub { + my $file = $dir->child('redirect1.txt'); + ok $file->download('/redirect'), 'file downloaded'; + ok -e $file, 'file exists'; + is $file->slurp, 'CHUNK1CHUNK2CHUNK3', 'right content'; + + my $ua2 = Mojo::UserAgent->new(max_redirects => 0); + my $file2 = $dir->child('redirect2.txt'); + ok $file2->download('/redirect', {ua => $ua2}), 'file downloaded'; + ok -e $file2, 'file exists'; + is $file2->slurp, '', 'right content'; +}; + +subtest 'Existing file is larger' => sub { + my $file = $dir->child('simple2.txt')->spew('CHUNK1CHUNK2CHUNK3CHUNK4'); + eval { $file->download('/simple') }; + like $@, qr/Download error: File size mismatch/, 'right error'; +}; + +subtest 'Resumed file download' => sub { + my $file = $dir->child('resume1.txt'); + ok !$file->download('/resume'), 'file partially downloaded'; + ok !$file->download('/resume'), 'file continued'; + ok $file->download('/resume'), 'file downloaded'; + ok -e $file, 'file exists'; + is $file->slurp, 'CHUNK1CHUNK2CHUNK3', 'right content'; +}; + +subtest 'Missing file' => sub { + my $file = $dir->child('missing1.txt'); + eval { $file->download('/missing') }; + like $@, qr/404 response: Not Found/, 'file not found'; +}; + +subtest 'File of unknown size' => sub { + my $file = $dir->child('stream1.txt')->spew('C'); + eval { $file->download('/stream') }; + like $@, qr/Download error: Unknown file size/, 'right error'; +}; + +subtest 'File of unknown size downloaded' => sub { + my $file = $dir->child('stream2.txt'); + ok $file->download('/stream'), 'file downloaded'; + ok -e $file, 'file exists'; + is $file->slurp, 'CHUNK1CHUNK2CHUNK3CHUNK4', 'right content'; +}; + +done_testing(); diff --git a/t/mojo/transactor.t b/t/mojo/transactor.t index 2665ba3ac4..193dd3363d 100644 --- a/t/mojo/transactor.t +++ b/t/mojo/transactor.t @@ -3,6 +3,7 @@ use Mojo::Base -strict; use Test::More; use Mojo::Asset::File; use Mojo::Asset::Memory; +use Mojo::File qw(tempdir); use Mojo::Promise; use Mojo::Transaction::WebSocket; use Mojo::URL; @@ -1016,6 +1017,89 @@ subtest '301 redirect without compression' => sub { is $tx->res->headers->location, undef, 'no "Location" value'; }; +subtest 'Download' => sub { + my $dir = tempdir; + my $no_file = $dir->child('no_file'); + my $small_file = $dir->child('small_file')->spew('x'); + my $large_file = $dir->child('large_file')->spew('xxxxxxxxxxx'); + my $correct_file = $dir->child('correct_file')->spew('xxxxxxxxxx'); + my $t = Mojo::UserAgent::Transactor->new; + + subtest 'Partial file exists' => sub { + my $head = $t->tx(HEAD => 'http://mojolicious.org/release.tar.gz'); + $head->res->headers->content_length(10); + $head->res->headers->accept_ranges('bytes'); + my $tx = $t->download($head, $small_file); + is $tx->req->method, 'GET', 'right method'; + is $tx->req->url->to_abs, 'http://mojolicious.org/release.tar.gz', 'right URL'; + is $tx->req->headers->range, 'bytes=1-10', 'right "Range" value'; + }; + + subtest 'Partial file exists (with headers)' => sub { + my $head = $t->tx(HEAD => 'http://mojolicious.org/release.tar.gz' => {Accept => 'application/json'}); + $head->res->headers->content_length(10); + $head->res->headers->accept_ranges('bytes'); + my $tx = $t->download($head, $small_file); + is $tx->req->method, 'GET', 'right method'; + is $tx->req->url->to_abs, 'http://mojolicious.org/release.tar.gz', 'right URL'; + is $tx->req->headers->range, 'bytes=1-10', 'right "Range" value'; + is $tx->req->headers->accept, 'application/json', 'right "Accept" value'; + }; + + subtest 'Failed HEAD request' => sub { + my $head = $t->tx(HEAD => 'http://mojolicious.org/release.tar.gz'); + $head->res->error({message => 'Failed to connect'}); + my $tx = $t->download($head, $no_file); + is $tx->error->{message}, 'Failed to connect', 'right error'; + }; + + subtest 'Empty HEAD response' => sub { + my $head = $t->tx(HEAD => 'http://mojolicious.org/release.tar.gz'); + my $tx = $t->download($head, $no_file); + is $tx->req->method, 'GET', 'right method'; + is $tx->req->url->to_abs, 'http://mojolicious.org/release.tar.gz', 'right URL'; + is $tx->req->headers->range, undef, 'no "Range" value'; + }; + + subtest 'Empty HEAD response (file exists)' => sub { + my $head = $t->tx(HEAD => 'http://mojolicious.org/release.tar.gz'); + my $tx = $t->download($head, $small_file); + is $tx->error->{message}, 'Unknown file size', 'right error'; + }; + + subtest 'Target file does not exist' => sub { + my $head = $t->tx(HEAD => 'http://mojolicious.org/release.tar.gz'); + $head->res->headers->content_length(10); + my $tx = $t->download($head, $no_file); + is $tx->req->method, 'GET', 'right method'; + is $tx->req->url->to_abs, 'http://mojolicious.org/release.tar.gz', 'right URL'; + is $tx->req->headers->range, undef, 'no "Range" value'; + }; + + subtest 'Partial file exists (unsupported server)' => sub { + my $head = $t->tx(HEAD => 'http://mojolicious.org/release.tar.gz'); + $head->res->headers->content_length(10); + my $tx = $t->download($head, $small_file); + is $tx->error->{message}, 'Server does not support partial requests', 'right error'; + }; + + subtest 'Partial file exists (larger than download)' => sub { + my $head = $t->tx(HEAD => 'http://mojolicious.org/release.tar.gz'); + $head->res->headers->content_length(10); + $head->res->headers->accept_ranges('bytes'); + my $tx = $t->download($head, $large_file); + is $tx->error->{message}, 'File size mismatch', 'right error'; + }; + + subtest 'Download already finished' => sub { + my $head = $t->tx(HEAD => 'http://mojolicious.org/release.tar.gz'); + $head->res->headers->content_length(10); + $head->res->headers->accept_ranges('bytes'); + my $tx = $t->download($head, $correct_file); + is $tx->error->{message}, 'Download complete', 'right error'; + }; +}; + subtest 'Promisify' => sub { my $promise = Mojo::Promise->new; my (@results, @errors);