3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package Bio::EnsEMBL::Utils::IO;
27 Please email comments or questions to the
public Ensembl
28 developers list at <http:
30 Questions may also be sent to the Ensembl help desk at
45 # use Bio::EnsEMBL::Utils::IO qw/:slurp/; # brings in any method starting with slurp
46 # use Bio::EnsEMBL::Utils::IO qw/:array/; # brings in any method which ends with _array
47 # use Bio::EnsEMBL::Utils::IO qw/:gz/; # brings all methods which start with gz_
48 # use Bio::EnsEMBL::Utils::IO qw/:bz/; # brings all methods which start with bz_
49 # use Bio::EnsEMBL::Utils::IO qw/:zip/; # brings all methods which start with zip_
50 # use Bio::EnsEMBL::Utils::IO qw/:all/; # brings all methods in
53 my $file_contents = slurp(
'/my/file/location.txt');
54 print length($file_contents);
57 my $file_contents_ref = slurp(
'/my/file/location.txt', 1);
58 print length($$file_contents_ref);
60 # Sending it to an array
61 my $array = slurp_to_array(
'/my/location');
62 work_with_file(
'/my/location',
'r', sub {
63 $array = process_to_array($_[0], sub {
64 #Gives us input line by line
69 # Simplified vesion but without the post processing
70 $array = fh_to_array($fh);
72 # Sending this back out to another file
73 work_with_file(
'/my/file/newlocation.txt',
'w', sub {
75 print $fh $$file_contents_ref;
79 # Gzipping the data to another file
80 gz_work_with_file(
'/my/file.gz',
'w', sub {
82 print $fh $$file_contents_ref;
86 # Working with a set of lines manually
87 work_with_file(
'/my/file',
'r', sub {
89 iterate_lines($fh, sub {
91 print $line; #Send the line in the file back out
97 # Doing the same in one go
98 iterate_file(
'/my/file', sub {
100 print $line; #Send the line in the file back out
104 # Move all data from one file handle to another. Bit like a copy
105 move_data($src_fh, $trg_fh);
109 A collection of subroutines aimed to helping IO based operations
128 use base qw(Exporter);
134 our @EXPORT_OK = qw/slurp slurp_to_array fh_to_array process_to_array work_with_file gz_slurp gz_slurp_to_array gz_work_with_file bz_slurp bz_slurp_to_array bz_work_with_file zip_slurp zip_slurp_to_array zip_work_with_file spurt filter_dir iterate_file iterate_lines move_data/;
137 slurp => [qw/slurp slurp_to_array gz_slurp gz_slurp_to_array/],
138 spurt => [qw/spurt/],
139 array => [qw/fh_to_array process_to_array slurp_to_array gz_slurp_to_array/],
140 gz => [qw/gz_slurp gz_slurp_to_array gz_work_with_file/],
141 bz => [qw/bz_slurp bz_slurp_to_array bz_work_with_file/],
142 zip => [qw/zip_slurp zip_slurp_to_array zip_work_with_file/],
143 iterate => [qw/iterate_file iterate_lines/],
150 require IO::Compress::Gzip;
151 require IO::Uncompress::Gunzip;
156 require IO::Compress::Bzip2;
157 require IO::Uncompress::Bunzip2;
162 require IO::Compress::Zip;
163 require IO::Uncompress::Unzip;
169 Arg [1] :
string $file
170 Arg [2] :
boolean; $want_ref
171 Arg [3] :
boolean; $binary
172 Indicates if we want to return a scalar reference
173 Description : Forces the contents of a file into a scalar. This is the
174 fastest way to get a file into memory in Perl. You can also
175 get a scalar reference back to avoid copying the file contents
176 in Scalar references. If the input file is binary then specify
178 Returntype : Scalar or reference of the file contents depending on arg 2
179 Example : my $contents = slurp('/tmp/file.txt');
180 Exceptions : If the file did not exist or was not readable
186 my ($file, $want_ref, $binary) = @_;
188 work_with_file($file,
'r', sub {
190 binmode($fh)
if $binary;
191 my $size_left = -s $file;
192 while( $size_left > 0 ) {
193 my $read_cnt = sysread($fh, $contents, $size_left, length($contents));
194 unless( $read_cnt ) {
195 throw "read error in file $file: $!" ;
198 $size_left -= $read_cnt ;
202 return ($want_ref) ? \$contents : $contents;
207 Arg [1] :
string $file
208 Arg [2] :
string $contents
209 Arg [3] :
boolean; $append
210 Arg [4] :
boolean; $binary
211 Description : Convenient method to safely open a file and dump some content into it.
212 $append can be set to append to the file instead of resetting it first.
213 $binary can be set if the content you are printing is not plain-text.
215 Example : spurt('/tmp/file.txt', $contents);
216 Exceptions : If the file could not be created or was not writable
222 my ( $file, $contents, $append, $binary ) = @_;
228 binmode($fh)
if $binary;
229 syswrite( $fh, $contents );
235 Arg [1] :
string $file
236 Arg [2] : boolean; $want_ref Indicates
if we want to
return a scalar reference
237 Arg [3] : boolean; $binary
238 Arg [4] : HashRef arguments to pass into IO compression layers
239 Description : Forces the contents of a file into a scalar. This is the
240 fastest way to get a file into memory in Perl. You can also
241 get a scalar reference back to avoid copying the file contents
242 in Scalar references. If the input file is binary then specify
244 Returntype : Scalar or reference of the file contents depending on arg 2
245 Example : my $contents = slurp(
'/tmp/file.txt.gz');
246 Exceptions : If the file did not exist or was not readable
252 my ($file, $want_ref, $binary, $args) = @_;
254 gz_work_with_file($file,
'r', sub {
257 binmode($fh)
if $binary;
261 return ($want_ref) ? \$contents : $contents;
266 Arg [1] :
string $file
267 Arg [2] : boolean; $want_ref Indicates
if we want to
return a scalar reference
268 Arg [3] : boolean; $binary
269 Arg [4] : HashRef arguments to pass into IO compression layers
270 Description : Forces the contents of a file into a scalar. This is the
271 fastest way to get a file into memory in Perl. You can also
272 get a scalar reference back to avoid copying the file contents
273 in Scalar references. If the input file is binary then specify
275 Returntype : Scalar or reference of the file contents depending on arg 2
276 Example : my $contents = slurp(
'/tmp/file.txt.bz2');
277 Exceptions : If the file did not exist or was not readable
283 my ($file, $want_ref, $binary, $args) = @_;
285 bz_work_with_file($file,
'r', sub {
288 binmode($fh)
if $binary;
292 return ($want_ref) ? \$contents : $contents;
297 Arg [1] :
string $file
298 Arg [2] : boolean; $want_ref Indicates
if we want to
return a scalar reference
299 Arg [3] : boolean; $binary
300 Arg [4] : HashRef arguments to pass into IO compression layers
301 Description : Forces the contents of a file into a scalar. This is the
302 fastest way to get a file into memory in Perl. You can also
303 get a scalar reference back to avoid copying the file contents
304 in Scalar references. If the input file is binary then specify
306 Returntype : Scalar or reference of the file contents depending on arg 2
307 Example : my $contents = slurp(
'/tmp/file.txt.zip');
308 Exceptions : If the file did not exist or was not readable
314 my ($file, $want_ref, $binary, $args) = @_;
316 zip_work_with_file($file,
'r', sub {
319 binmode($fh)
if $binary;
323 return ($want_ref) ? \$contents : $contents;
327 =head2 slurp_to_array
329 Arg [1] :
string $file
330 Arg [2] :
boolean $chomp
331 Description : Sends the contents of the given file into an ArrayRef
332 Returntype : ArrayRef
333 Example : my $contents_array = slurp_to_array(
'/tmp/file.txt');
334 Exceptions : If the file did not exist or was not readable
340 my ($file, $chomp) = @_;
342 work_with_file($file,
'r', sub {
344 $contents = fh_to_array($fh, $chomp);
350 =head2 gz_slurp_to_array
352 Arg [1] :
string $file
353 Arg [2] :
boolean $chomp
354 Arg [3] : HashRef arguments to pass into IO compression layers
355 Description : Sends the contents of the given gzipped file into an ArrayRef
356 Returntype : ArrayRef
357 Example : my $contents_array = gz_slurp_to_array(
'/tmp/file.txt.gz');
358 Exceptions : If the file did not exist or was not readable
363 sub gz_slurp_to_array {
364 my ($file, $chomp, $args) = @_;
366 gz_work_with_file($file,
'r', sub {
368 $contents = fh_to_array($fh, $chomp);
374 =head2 bz_slurp_to_array
376 Arg [1] :
string $file
377 Arg [2] :
boolean $chomp
378 Arg [3] : HashRef arguments to pass into IO compression layers
379 Description : Sends the contents of the given bzipped file into an ArrayRef
380 Returntype : ArrayRef
381 Example : my $contents_array = bz_slurp_to_array(
'/tmp/file.txt.bz2');
382 Exceptions : If the file did not exist or was not readable
387 sub bz_slurp_to_array {
388 my ($file, $chomp, $args) = @_;
390 bz_work_with_file($file,
'r', sub {
392 $contents = fh_to_array($fh, $chomp);
398 =head2 zip_slurp_to_array
400 Arg [1] :
string $file
401 Arg [2] :
boolean $chomp
402 Arg [3] : HashRef arguments to pass into IO compression layers
403 Description : Sends the contents of the given zipped file into an ArrayRef
404 Returntype : ArrayRef
405 Example : my $contents_array = zip_slurp_to_array(
'/tmp/file.txt.zip');
406 Exceptions : If the file did not exist or was not readable
411 sub zip_slurp_to_array {
412 my ($file, $chomp, $args) = @_;
414 zip_work_with_file($file,
'r', sub {
416 $contents = fh_to_array($fh, $chomp);
424 Arg [1] : Glob/IO::Handle $fh
425 Arg [2] :
boolean $chomp
426 Description : Sends the contents of the given filehandle into an ArrayRef.
427 Will perform chomp on each line
if specified. If you require
428 any more advanced line based processing then see
430 Returntype : ArrayRef
431 Example : my $contents_array = fh_to_array($fh);
438 my ($fh, $chomp) = @_;
440 return process_to_array($fh, sub {
446 my @contents = <$fh>;
450 =head2 process_to_array
452 Arg [1] : Glob/IO::Handle $fh
453 Arg [2] : CodeRef $callback
454 Description : Sends the contents of the given file handle into an ArrayRef
455 via the processing callback. Assumes line based input.
456 Returntype : ArrayRef
457 Example : my $array = process_to_array($fh, sub {
return "INPUT: $_"; });
458 Exceptions : If the fh did not exist or
if a callback was not given.
463 sub process_to_array {
464 my ($fh, $callback) = @_;
465 assert_file_handle($fh,
'FileHandle');
466 assert_ref($callback,
'CODE',
'callback');
468 iterate_lines($fh, sub {
470 push(@contents, $callback->($line));
478 Arg [1] : Glob/IO::Handle $fh
479 Arg [2] : CodeRef $callback
480 Description : Iterates through each line from the given file handle and
481 hands them to the callback one by one
483 Example : iterate_lines($fh, sub { print
"INPUT: $_"; });
484 Exceptions : If the fh did not exist or
if a callback was not given.
490 my ($fh, $callback) = @_;
491 assert_file_handle($fh,
'FileHandle');
492 assert_ref($callback,
'CODE',
'callback');
493 while( my $line = <$fh> ) {
501 Arg [1] :
string $file
502 Arg [3] : CodeRef the callback which is used to iterate the lines in
504 Description : Iterates through each line from the given file and
505 hands them to the callback one by one
507 Example : iterate_file(
'/my/file', sub { print
"INPUT: $_"; });
508 Exceptions : If the file did not exist or
if a callback was not given.
515 my ($file, $callback) = @_;
516 work_with_file($file,
'r', sub {
518 iterate_lines($fh, $callback);
526 =head2 work_with_file
528 Arg [1] :
string $file
529 Arg [2] : string; $mode
530 Supports all modes specified by the C<open()>
function as well as those
531 supported by IO::File
532 Arg [3] : CodeRef the callback which is given the open file handle as
534 Description : Performs the nitty gritty of checking
if a file handle is open
535 and closing the resulting filehandle down.
537 Example : work_with_file(
'/tmp/out.txt',
'w', sub {
542 Exceptions : If we could not work with the file due to permissions
548 my ($file, $mode, $callback) = @_;
549 throw "We need a file name to open" if ! $file;
550 throw "We need a mode to open the requested file with" if ! $mode;
551 assert_ref($callback,
'CODE',
'callback');
552 my $fh = IO::File->new($file, $mode) or
553 throw "Cannot open '${file}' in mode '${mode}': $!";
555 close($fh) or
throw "Cannot close FH from ${file}: $!";
559 =head2 gz_work_with_file
561 Arg [1] :
string $file
562 Arg [2] : string; $mode
563 Supports modes like C<r>, C<w>, C<>> and C<<>
564 Arg [3] : CodeRef the callback which is given the open file handle as
566 Arg [4] : HashRef used to pass options into the IO
567 compression/uncompression modules
568 Description : Performs the nitty gritty of checking
if a file handle is open
569 and closing the resulting filehandle down.
571 Example : gz_work_with_file(
'/tmp/out.txt.gz',
'w', sub {
576 Exceptions : If we could not work with the file due to permissions
581 sub gz_work_with_file {
582 my ($file, $mode, $callback, $args) = @_;
583 throw "IO::Compress was not available" if ! $GZIP_OK;
584 throw "We need a file name to open" if ! $file;
585 throw "We need a mode to open the requested file with" if ! $mode;
586 assert_ref($callback,
'CODE',
'callback');
591 no warnings qw/once/;
592 if($mode =~
'>$' || $mode eq
'w') {
593 $args->{Append} = 1
if $mode =~ />>$/;
594 $fh = IO::Compress::Gzip->new($file, %$args) or
throw "Cannot open '$file' for writing: $IO::Compress::Gzip::GzipError";
596 elsif($mode eq
'<' || $mode eq
'r') {
597 $fh = IO::Uncompress::Gunzip->new($file, %$args) or
throw "Cannot open '$file' for writing: $IO::Uncompress::Gunzip::GunzipError";
600 throw "Could not decipher a mode from '$mode'";
604 close($fh) or
throw "Cannot close FH from ${file}: $!";
608 =head2 bz_work_with_file
610 Arg [1] :
string $file
611 Arg [2] : string; $mode
612 Supports modes like C<r>, C<w>, C<>> and C<<>
613 Arg [3] : CodeRef the callback which is given the open file handle as
615 Arg [4] : HashRef used to pass options into the IO
616 compression/uncompression modules
617 Description : Performs the nitty gritty of checking
if a file handle is open
618 and closing the resulting filehandle down.
620 Example : bz_work_with_file(
'/tmp/out.txt.bz2',
'w', sub {
625 Exceptions : If we could not work with the file due to permissions
630 sub bz_work_with_file {
631 my ($file, $mode, $callback, $args) = @_;
632 throw "IO::Compress was not available" if ! $BZIP2_OK;
633 throw "We need a file name to open" if ! $file;
634 throw "We need a mode to open the requested file with" if ! $mode;
635 assert_ref($callback,
'CODE',
'callback');
640 no warnings qw/once/;
641 if($mode =~
'>$' || $mode eq
'w') {
642 $args->{Append} = 1
if $mode =~ />>$/;
643 $fh = IO::Compress::Bzip2->new($file, %$args) or
throw "Cannot open '$file' for writing: $IO::Compress::Bzip2::Bzip2Error";
645 elsif($mode eq
'<' || $mode eq
'r') {
646 $fh = IO::Uncompress::Bunzip2->new($file, %$args) or
throw "Cannot open '$file' for writing: $IO::Uncompress::Bunzip2::Bunzip2Error";
649 throw "Could not decipher a mode from '$mode'";
653 close($fh) or
throw "Cannot close FH from ${file}: $!";
657 =head2 zip_work_with_file
659 Arg [1] :
string $file
660 Arg [2] : string; $mode
661 Supports modes like C<r>, C<w>, C<>> and C<<>
662 Arg [3] : CodeRef the callback which is given the open file handle as
664 Arg [4] : HashRef used to pass options into the IO
665 compression/uncompression modules
666 Description : Performs the nitty gritty of checking
if a file handle is open
667 and closing the resulting filehandle down.
669 Example : zip_work_with_file(
'/tmp/out.txt.zip',
'w', sub {
674 Exceptions : If we could not work with the file due to permissions
679 sub zip_work_with_file {
680 my ($file, $mode, $callback, $args) = @_;
681 throw "IO::Compress was not available" if ! $ZIP_OK;
682 throw "We need a file name to open" if ! $file;
683 throw "We need a mode to open the requested file with" if ! $mode;
684 assert_ref($callback,
'CODE',
'callback');
689 no warnings qw/once/;
690 if($mode =~
'>$' || $mode eq
'w') {
691 $args->{Append} = 1
if $mode =~ />>$/;
692 $fh = IO::Compress::Zip->new($file, %$args) or
throw "Cannot open '$file' for writing: $IO::Compress::Zip::ZipError";
694 elsif($mode eq
'<' || $mode eq
'r') {
695 $fh = IO::Uncompress::Unzip->new($file, %$args) or
throw "Cannot open '$file' for writing: $IO::Uncompress::Unzip::UnzipError";
698 throw "Could not decipher a mode from '$mode'";
702 close($fh) or
throw "Cannot close FH from ${file}: $!";
708 Arg [1] : String; directory
709 Arg [2] : CodeRef; the callback which is given a file in the
710 directory as its only argument
711 Description : Return the lexicographically sorted content of a directory.
712 The callback allows to specify the criteria an entry in
713 the directory must satisfy in order to appear in the content.
714 Returntype : Arrayref; list with the filtered files/directory
715 Example : filter_dir(
'/tmp', sub {
718 # select perl scripts in the directory
719 return $file
if $file =~ /\.pl$/;
721 Exceptions : If the directory cannot be opened or its handle
728 my ($dir, $callback) = @_;
730 assert_ref($callback,
'CODE',
'callback');
732 opendir(my $dh, $dir) or
throw "Cannot open directory $dir";
733 my @files = sort grep { $callback->($_) } readdir($dh);
734 closedir($dh) or
throw "Cannot close directory $dir";
742 Arg [1] : FileHandle $src_fh
743 Arg [2] : FileHandle $trg_fh
744 Arg [3] :
int $buffer. Defaults to 8KB
745 Description : Moves data from the given source filehandle to the target one
746 using a 8KB buffer or user specified buffer
748 Example : move_data($src_fh, $trg_fh, 16*1024); # copy in 16KB chunks
749 Exceptions : If inputs were not as expected
754 my ($src_fh, $trg_fh, $buffer_size) = @_;
755 assert_file_handle($src_fh,
'SourceFileHandle');
756 assert_file_handle($trg_fh,
'TargetFileHandle');
758 $buffer_size ||= 8192; #Default 8KB
761 my $read = sysread($src_fh, $buffer, $buffer_size);
762 if(! defined $read) {
763 throw "Error whilst reading from filehandle: $!";
768 my $written = syswrite($trg_fh, $buffer);
769 if(!defined $written) {
770 throw "Error whilst writing to filehandle: $!";