3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::FetchFiles;
25 # Given one or several FTP or HTTP URIs, download them. If an URI is
26 # for a file or MySQL connection, then these will be ignored. For
27 # FTP, standard shell file name globbing is allowed (but not regular
28 # expressions). HTTP does not allow file name globbing. The routine
29 # returns a list of successfully downloaded local files or an empty list
30 # if there was an error.
35 use Digest::MD5 qw(md5_hex);
37 use POSIX qw(strftime);
40 use File::Spec::Functions;
46 use Text::Glob qw( match_glob );
50 my $base_dir = File::Spec->curdir();
55 my $class = ref $proto || $proto;
56 return bless {}, $class;
60 my ($self, $arg_ref) = @_;
63 my $dest_dir = $arg_ref->{dest_dir};
64 my $user_uris = $arg_ref->{user_uris};
65 my $deletedownloaded = $arg_ref->{del_down};
66 my $checkdownload = $arg_ref->{chk_down};
67 my $verbose = $arg_ref->{verbose} ;
71 foreach my $user_uri (@$user_uris) {
72 # Change old-style 'LOCAL:' URIs into 'file:'.
73 $user_uri =~ s/^LOCAL:/file:/ix;
74 my $uri = URI->new($user_uri);
76 if ( $uri->scheme() eq
'script' ) {
77 push( @processed_files, $user_uri );
78 } elsif ( $uri->scheme() eq
'file' ) {
80 # Deal with local files.
84 push( @processed_files, $user_uri );
86 printf(
"==> Can not find file '%s' (or it is empty)\n",
90 } elsif ( $uri->scheme() eq
'ftp' ) {
91 # Deal with FTP files.
93 my $file_path = catfile( $dest_dir, basename( $uri->path() ) );
95 if ( $deletedownloaded && -e $file_path ) {
97 printf(
"Deleting '%s'\n", $file_path );
102 if ( $checkdownload && -s $file_path ) {
103 # The file is already there, no need to connect to a FTP
104 # server. This also means no file name globbing was
105 # used (for globbing FTP URIs, we always need to connect
106 # to a FTP site to see what files are there).
109 printf(
"File '%s' already exists\n", $file_path );
111 push( @processed_files, $file_path );
115 if ( -e $file_path ) { unlink($file_path) }
118 printf(
"Connecting to FTP host '%s' for file '%s' \n",
119 $uri->host(), $file_path );
122 my $ftp = $self->get_ftp($uri, 0);
123 if(!defined($ftp) or ! $ftp->can(
'ls') or !$ftp->ls()){
124 $ftp = $self->get_ftp($uri, 1);
126 foreach my $remote_file ( ( @{ $ftp->ls() } ) ) {
127 if ( !match_glob( basename( $uri->path() ), $remote_file ) ) {
131 $file_path = catfile( $dest_dir, basename($remote_file) );
133 if ( $deletedownloaded && -e $file_path ) {
135 printf(
"Deleting '%s'\n", $file_path );
140 if ( $checkdownload && -s $file_path ) {
142 printf(
"File '%s' already exists\n", $file_path );
146 if ( -e $file_path ) { unlink($file_path) }
148 if ( !-d dirname($file_path) ) {
150 printf(
"Creating directory '%s'\n",
151 dirname($file_path) );
153 if ( !mkdir( dirname($file_path) ) ) {
154 printf(
"==> Can not create directory '%s': %s",
155 dirname($file_path), $! );
161 printf(
"Fetching '%s' (size = %s)\n",
163 $ftp->size($remote_file) ||
'(unknown)' );
164 printf(
"Local file is '%s'\n", $file_path );
167 if ( !$ftp->get( $remote_file, $file_path ) ) {
168 printf(
"==> Could not get '%s': %s\n",
169 basename( $uri->path() ), $ftp->message() );
172 } ## end
else [
if ( $checkdownload &&...)]
174 if ( $file_path =~ /\.(gz|Z)$/x ) {
175 # Read from zcat pipe
177 my $cmd =
"gzip -t $file_path";
178 if ( system($cmd) != 0 ) {
179 printf(
"system command '%s' failed: %s - "
180 .
"Checking of gzip file failed - "
181 .
"FILE CORRUPTED ?\n\n",
184 if ( -e $file_path ) {
186 printf(
"Deleting '%s'\n", $file_path );
193 printf(
"'%s' passed (gzip -t) corruption test.\n",
198 push( @processed_files, $file_path );
200 } ## end
foreach my $remote_file ( (...))
201 if (!@processed_files) { printf (
"No files found matching $uri") ; }
204 } elsif ( $uri->scheme() eq
'http' || $uri->scheme eq
'https') {
205 # Deal with HTTP files.
207 my $filename = basename ($uri->path() );
208 if ($uri->path eq
'') { $filename =
"index.html"; }
210 my $file_path = catfile( $dest_dir, $filename );
212 if ( $deletedownloaded && -e $file_path ) {
214 printf(
"Deleting '%s'\n", $file_path );
219 if ( $checkdownload && -s $file_path ) {
220 # The file is already there, no need to connect to a
224 printf(
"File '%s' already exists\n", $file_path );
226 push( @processed_files, $file_path );
230 if ( -e $file_path ) { unlink($file_path) }
232 if ( !-d dirname($file_path) ) {
234 printf(
"Creating directory '%s'\n", dirname($file_path) );
236 if ( !mkdir( dirname($file_path) ) ) {
237 printf(
"==> Can not create directory '%s': %s",
238 dirname($file_path), $! );
244 printf(
"Connecting to HTTP host '%s'\n", $uri->host() );
245 printf(
"Fetching '%s'\n", $uri->path() );
248 if ( $checkdownload && -s $file_path ) {
250 printf(
"File '%s' already exists\n", $file_path );
255 printf(
"Local file is '%s'\n", $file_path );
258 if ( -e $file_path ) { unlink($file_path) }
260 open OUT,
">$file_path" or die
"Couldn't open file $file_path $!";
261 my $http = HTTP::Tiny->new();
263 my $response = $http->get($uri->as_string());
265 if ( !$response->{success} ) {
266 printf(
"==> Could not get '%s': %s\n",
267 basename( $uri->path() ), $response->{content} );
270 print OUT $response->{content};
274 push( @processed_files, $file_path );
276 } elsif ( $uri->scheme() eq
'mysql' ) {
277 # Just leave MySQL data untouched for now.
278 push( @processed_files, $user_uri );
280 printf(
"==> Unknown URI scheme '%s' in URI '%s'\n",
281 $uri->scheme(), $uri->as_string() );
284 } ## end
foreach my $user_uri (@user_uris)
286 return @processed_files;
287 } ## end sub fetch_files
291 my ($self, $uri, $passive) = @_;
295 $ftp = Net::FTP->new( $uri->host(),
'Debug' => 0, Passive => 1);
298 $ftp = Net::FTP->new( $uri->host(),
'Debug' => 0);
301 if ( !defined($ftp) ) {
302 printf(
"==> Can not open FTP connection: %s\n", $@ );
306 if ( !$ftp->login(
'anonymous',
'-anonymous@' ) ) {
307 printf(
"==> Can not log in on FTP host: %s\n",
312 if ( !$ftp->cwd( dirname( $uri->path() ) ) ) {
313 printf(
"== Can not change directory to '%s': %s\n",
314 dirname( $uri->path() ), $ftp->message() );