3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package Bio::EnsEMBL::DBSQL::DataFileAdaptor;
27 Please email comments or questions to the
public Ensembl
28 developers list at <http:
30 Questions may also be sent to the Ensembl help desk at
39 my $dfa = $dba->get_DataFileAdaptor();
41 my $files = $dfa->fetch_all();
43 my $logic_name_files = $dfa->fetch_all_by_logic_name(
'bam_alignments');
47 Provides a database wrapper to store the locations of files and to pull these
48 records back out.
DataFile objects can only provide basic information but they
49 can
return an intended external database adaptor which can be used to
50 parse the information. This system assumes nothing
about the file just that
51 your parser can access it.
53 Files are supported over any protocol your parser supports and locations can be
54 made absolute, built on the fly or versioned.
72 =head2 global_base_path
74 Arg[1] : String; base path
76 Description : Stores a global value to be used when building data file paths
82 sub global_base_path {
83 my ($class, $base_path) = @_;
84 return $GLOBAL_BASE_PATH unless $base_path;
85 $GLOBAL_BASE_PATH = $base_path;
86 return $GLOBAL_BASE_PATH;
91 Arg[1] : String; (optional) base path
92 Example : $dfa->get_base_path();
93 Description : If given the path it will
return that path;
if not it consults
94 $self->global_base_path()
for a value. As a last resort
95 it will look at the meta table
for an entry keyed by
96 B<data_file.base_path>
98 Exceptions : Thrown
if nothing is found after consulting all three locations
103 my ($self, $path) = @_;
104 return $path
if defined $path;
105 my $global_base_path = $self->global_base_path();
106 return $global_base_path
if defined $global_base_path;
107 my $meta_base_path = $self->db()->get_MetaContainer()->single_value_by_key(
'data_file.base_path', 1);
108 return $meta_base_path
if defined $meta_base_path;
109 throw "No base path discovered. Either provide a path, set a global using global_base_path() or specify 'data_file.base_path' in meta";
112 =head2 DataFile_to_extensions
115 Example : my $exts = $dfa->DataFile_to_extensions($bam_df);
116 Description : Returns all expected extensions
for the given
DataFile type. The
117 first returned is the
default extension
118 Returntype : ArrayRef
119 Exceptions : Raised
if the given file type is not understood
123 sub DataFile_to_extensions {
124 my ($self, $df) = @_;
127 BAM => [
'bam',
'bam.bai'],
128 BAMCOV => [
'bam',
'bam.bai',
'bam.bw'], # BAM coverage files
131 VCF => [
'vcf.gz',
'vcf.gz.tbi'],
133 throw sprintf(q{No extensions found
for the type
'%s'}, $type )
if ! $extensions;
138 =head2 DataFile_to_adaptor
141 Arg[2] : (optional) base path
142 Arg[3] : (optional) file type
143 Example : my $bam = $dfa->DataFile_to_adaptor($bam_df);
144 Description : Returns an adaptor instance which will access the given DataFile.
145 Can explicitly request
for an adaptor of a given file type (third
146 argument), useful with composite types, e.g. BAM coverage files
147 can be returned as BAM or BIGWIG
148 Returntype : Scalar actual
return depends upon the given file type and the
150 Exceptions : Raised
if the given file type is not understood or
if the requested
151 file type is incompatible with the actual data file type.
155 sub DataFile_to_adaptor {
156 my ($self, $df, $base, $requested_type) = @_;
159 throw sprintf(
"Request for a '%s' adaptor, but file is of type '%s'", $requested_type, $type)
160 if $type ne
'BAMCOV' and $type ne $requested_type;
164 return Bio::EnsEMBL::IO::Adaptor::BAMAdaptor->new($df->path($base))
167 return Bio::EnsEMBL::IO::Adaptor::BigBedAdaptor->new($df->path($base))
168 if $type eq
'BIGBED';
170 return Bio::EnsEMBL::IO::Adaptor::BigWigAdaptor->new($df->path($base))
171 if $type eq
'BIGWIG';
173 return Bio::EnsEMBL::IO::Adaptor::VCFAdaptor->new($df->path($base))
176 # BAMCOV composite case
177 if ($type eq
'BAMCOV') {
178 return Bio::EnsEMBL::IO::Adaptor::BAMAdaptor->new($df->path($base))
179 if $requested_type eq
'BAM' or $requested_type eq
'BAMCOV';
181 return Bio::EnsEMBL::IO::Adaptor::BigWigAdaptor->new($df->get_all_paths($base)->[2])
182 if $requested_type eq
'BIGWIG';
185 throw sprintf(q{No
'%s' handler found
for the type
'%s'}, $requested_type, $type )
190 =head2 fetch_all_by_logic_name
192 Args [1] : String $logic_name
for the linked analysis
193 Example : my $dfs = $dfa->fetch_all_by_logic_name(
'bam_alignments');
194 Description : Returns all DataFile entries linked to the given analysis
197 Exceptions : Thrown
if logic name does not exist
201 sub fetch_all_by_logic_name {
202 my ($self, $logic_name) = @_;
203 my $analysis = $self->db()->get_AnalysisAdaptor()->fetch_by_logic_name($logic_name);
204 throw "No analysis found for logic_name '${logic_name}'" if ! $analysis;
205 return $self->fetch_all_by_Analysis($analysis);
208 =head2 fetch_all_by_Analysis
211 Example : my $dfs = $dfa->fetch_all_by_Analysis($analysis);
212 Description : Returns all DataFile entries linked to the given analysis
218 sub fetch_all_by_Analysis {
219 my ($self, $analysis) = @_;
220 assert_ref($analysis,
'Bio::EnsEMBL::Analysis',
'analysis');
221 $self->bind_param_generic_fetch($analysis->dbID(), SQL_INTEGER);
222 return $self->generic_fetch(
'df.analysis_id =?');
225 =head2 fetch_all_by_CoordSystem
228 Example : my $dfs = $dfa->fetch_all_by_CoordSystem($cs);
229 Description : Returns all DataFile entries linked to the given coordinate
230 system. Does B<not> support I<toplevel>
236 sub fetch_all_by_CoordSystem {
237 my ($self, $cs) = @_;
238 assert_ref($cs,
'Bio::EnsEMBL::CoordSystem',
'coord_system');
239 $self->bind_param_generic_fetch($cs->dbID(), SQL_INTEGER);
240 return $self->generic_fetch(
'df.coord_system_id =?');
243 sub fetch_by_name_and_type {
244 my ($self, $name, $type) = @_;
245 $self->bind_param_generic_fetch($name, SQL_VARCHAR);
246 $self->bind_param_generic_fetch($type, SQL_VARCHAR);
247 my $results = $self->generic_fetch(
'df.name =? and df.file_type =?');
248 return $results->[0]
if @{$results};
253 my ($self, $constraint) = @_;
257 select df.data_file_id, df.coord_system_id, df.analysis_id, df.name, df.version_lock, df.absolute, df.url, df.file_type
259 join coord_system cs
using (coord_system_id)
260 where cs.species_id =?
262 $sql .=
'AND '.$constraint
if $constraint;
264 my $params = $self->bind_param_generic_fetch();
265 if(defined $params) {
266 $self->{
'_bind_param_generic_fetch'} = ();
271 unshift(@{$params}, $self->db()->species_id());
273 my $csa = $self->db()->get_CoordSystemAdaptor();
274 my $aa = $self->db()->get_AnalysisAdaptor();
276 return $self->dbc()->sql_helper()->execute(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub {
278 my ($data_file_id, $coord_system_id, $analysis_id, $name, $version_lock, $absolute, $url, $file_type) = @{$row};
280 dbID => $data_file_id,
282 coord_system => $csa->fetch_by_dbID($coord_system_id),
283 analysis => $aa->fetch_by_dbID($analysis_id),
285 version_lock => $version_lock,
286 absolute => $absolute,
287 file_type => $file_type,
289 $hash->{url} = $url
if $url;
295 my ($self, $df) = @_;
297 assert_ref($df,
'Bio::EnsEMBL::DataFile',
'datafile');
299 if ($df->is_stored($self->db())) {
303 throw 'Analysis is not defined for this data file' if ! defined $df->analysis();
304 throw 'Coord system is not defined for this data file' if ! defined $df->coord_system();
307 INSERT INTO data_file (coord_system_id, analysis_id, name, version_lock, absolute, url, file_type)
308 VALUES (?,?,?,?,?,?,?)
311 [$df->coord_system()->dbID(), SQL_INTEGER],
312 [$df->analysis()->dbID(), SQL_INTEGER],
313 [$df->name(), SQL_VARCHAR],
314 [$df->version_lock(), SQL_INTEGER],
315 [$df->absolute(), SQL_INTEGER],
316 [$df->url(), SQL_VARCHAR],
317 [$df->file_type(), SQL_VARCHAR],
319 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params, -CALLBACK => sub {
320 my ( $sth, $dbh ) = @_;
321 $df->dbID($self->last_insert_id());
330 my ($self, $df) = @_;
332 assert_ref($df,
'Bio::EnsEMBL::DataFile',
'datafile');
334 if (! $df->is_stored($self->db())) {
340 UPDATE data_file SET coord_system_id =?, analysis_id=?, name=?, version_lock=?, absolute=?, url=?, file_type=?
341 WHERE data_file_id =?
344 [$df->coord_system()->dbID(), SQL_INTEGER],
345 [$df->analysis()->dbID(), SQL_INTEGER],
346 [$df->name(), SQL_VARCHAR],
347 [$df->version_lock(), SQL_INTEGER],
348 [$df->absolute(), SQL_INTEGER],
349 [$df->url(), SQL_VARCHAR],
350 [$df->file_type(), SQL_VARCHAR],
351 [$df->dbID(), SQL_INTEGER],
353 $self->dbc()->sql_helper()->execute_update(-SQL => $sql, -PARAMS => $params);
358 my ($self, $df) = @_;
360 assert_ref($df,
'Bio::EnsEMBL::DataFile',
'datafile');
362 if (! $df->is_stored($self->db())) {
363 throw "Cannot delete the data file if it has not already been stored in this database";
366 $self->dbc()->sql_helper()->execute_update(
367 -SQL =>
'DELETE from data_file where data_file_id =?',
368 -PARAMS => [[$df->dbID(), SQL_INTEGER]],