ensembl-hive  2.7.0
CoordSystemAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 
35 =head1 SYNOPSIS
36 
38 
40  -host => 'ensembldb.ensembl.org',
41  -user => 'anonymous'
42  );
43 
44  $csa = Bio::EnsEMBL::Registry->get_adaptor( "human", "core",
45  "coordsystem" );
46 
47  #
48  # Get all coord systems in the database:
49  #
50  foreach my $cs ( @{ $csa->fetch_all() } ) {
51  print $cs->name, ' ', $cs->version, "\n";
52  }
53 
54  #
55  # Fetching by name:
56  #
57 
58  # use the default version of coord_system 'chromosome' (e.g. NCBI33):
59  $cs = $csa->fetch_by_name('chromosome');
60 
61  # get an explicit version of coord_system 'chromosome':
62  $cs = $csa->fetch_by_name( 'chromsome', 'NCBI34' );
63 
64  # get all coord_systems of name 'chromosome':
65  foreach $cs ( @{ $csa->fetch_all_by_name('chromosome') } ) {
66  print $cs->name, ' ', $cs->version, "\n";
67  }
68 
69  #
70  # Fetching by rank:
71  #
72  $cs = $csa->fetch_by_rank(2);
73 
74  #
75  # Fetching the pseudo coord system 'toplevel'
76  #
77 
78  # Get the default top_level coord system:
79  $cs = $csa->fetch_top_level();
80 
81  # can also use an alias in fetch_by_name:
82  $cs = $csa->fetch_by_name('toplevel');
83 
84  # can also request toplevel using rank=0
85  $cs = $csa->fetch_by_rank(0);
86 
87  #
88  # Fetching by sequence level:
89  #
90 
91  # Get the coord system which is used to store sequence:
92  $cs = $csa->fetch_sequence_level();
93 
94  # can also use an alias in fetch_by_name:
95  $cs = $csa->fetch_by_name('seqlevel');
96 
97  #
98  # Fetching by id
99  #
100  $cs = $csa->fetch_by_dbID(1);
101 
102 
103 =head1 DESCRIPTION
104 
105 This adaptor allows the querying of information from the coordinate
106 system adaptor.
107 
108 Note that many coordinate systems do not have a concept of a version
109 for the entire coordinate system (though they may have a per-sequence
110 version). The 'chromosome' coordinate system usually has a version
111 (i.e. the assembly version) but the clonal coordinate system does not
112 (despite having individual sequence versions). In the case where a
113 coordinate system does not have a version an empty string ('') is used
114 instead.
115 
116 =head1 METHODS
117 
118 =cut
119 
120 package Bio::EnsEMBL::DBSQL::CoordSystemAdaptor;
121 
122 use strict;
123 use warnings;
124 
125 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
126 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
127 use Bio::EnsEMBL::CoordSystem;
128 
129 use vars qw(@ISA);
130 
131 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
132 
133 
134 =head2 new
135 
136  Arg [1] : See BaseAdaptor for arguments (none specific to this
137  subclass)
138  Example : $cs = $db->get_CoordSystemAdaptor(); #better than new()
139  Description: Creates a new CoordSystem adaptor and caches the contents
140  of the coord_system table in memory.
142  Exceptions : none
143  Caller :
144  Status : Stable
145 
146 =cut
147 
148 sub new {
149  my ( $proto, @args ) = @_;
150 
151  my $class = ref($proto) || $proto;
152  my $self = $class->SUPER::new(@args);
153 
154  #
155  # Cache the entire contents of the coord_system table cross-referenced
156  # by dbID and name.
157  #
158 
159  # keyed on name, list of coord_system value
160  $self->{'_name_cache'} = {};
161 
162  # keyed on id, coord_system value
163  $self->{'_dbID_cache'} = {};
164 
165  # keyed on rank
166  $self->{'_rank_cache'} = {};
167 
168  # keyed on id, 1/undef values
169  $self->{'_is_sequence_level'} = {};
170  $self->{'_is_default_version'} = {};
171 
172  #cache to store the seq_region_mapping information
173  #from internal->external
174  $self->{'_internal_seq_region_mapping'} = {};
175  #from external->internal
176  $self->{'_external_seq_region_mapping'} = {};
177 
178  my $sth = $self->prepare(
179  'SELECT `coord_system_id`, `name`, `rank`, `version`, `attrib` '
180  . 'FROM coord_system '
181  . 'WHERE species_id = ?' );
182 
183  $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
184  $sth->execute();
185 
186  my ( $dbID, $name, $rank, $version, $attrib );
187  $sth->bind_columns( \( $dbID, $name, $rank, $version, $attrib ) );
188 
189  while ( $sth->fetch() ) {
190  my $seq_lvl = 0;
191  my $default = 0;
192 
193  if ( defined($attrib) ) {
194  foreach my $attrib ( split( ',', $attrib ) ) {
195  $self->{"_is_$attrib"}->{$dbID} = 1;
196  if ( $attrib eq 'sequence_level' ) {
197  $seq_lvl = 1;
198  } elsif ( $attrib eq 'default_version' ) {
199  $default = 1;
200  }
201  }
202  }
203 
204  my $cs =
205  Bio::EnsEMBL::CoordSystem->new( -DBID => $dbID,
206  -ADAPTOR => $self,
207  -NAME => $name,
208  -VERSION => $version,
209  -RANK => $rank,
210  -SEQUENCE_LEVEL => $seq_lvl,
211  -DEFAULT => $default );
212 
213  $self->{'_dbID_cache'}->{$dbID} = $cs;
214 
215  $self->{'_name_cache'}->{ lc($name) } ||= [];
216  $self->{'_rank_cache'}->{$rank} = $cs;
217 
218  push @{ $self->{'_name_cache'}->{ lc($name) } }, $cs;
219 
220  } ## end while ( $sth->fetch() )
221  $sth->finish();
222 
223  $self->_cache_mapping_paths();
224 
225  $self->_cache_seq_region_mapping();
226 
227  return $self;
228 } ## end sub new
229 
230 sub _cache_seq_region_mapping {
231  #
232  # This cache will load the information from the seq_region_table, if
233  # any, to allow mapping between internal and external seq_region_id.
234  #
235 
236  my ($self) = @_;
237 
238  # For a given core database, will return the schema_build information.
239  my $schema_build = $self->db->_get_schema_build();
240 
241  # Prepare the query to get relation for the current database being
242  # used.
243  my $sql = qq(
244  SELECT s.internal_seq_region_id,
245  s.external_seq_region_id
246  FROM seq_region_mapping s,
247  mapping_set ms,
248  seq_region sr,
249  coord_system cs
250  WHERE ms.mapping_set_id = s.mapping_set_id
251  AND ms.internal_schema_build = ?
252  AND s.internal_seq_region_id = sr.seq_region_id
253  AND sr.coord_system_id = cs.coord_system_id
254  AND cs.species_id = ?);
255 
256  my $sth = $self->prepare($sql);
257 
258  $sth->bind_param( 1, $schema_build, SQL_VARCHAR );
259  $sth->bind_param( 2, $self->species_id(), SQL_INTEGER );
260 
261  $sth->execute();
262 
263  # Load the cache:
264  foreach my $row ( @{ $sth->fetchall_arrayref() } ) {
265  # internal->external
266  $self->{'_internal_seq_region_mapping'}->{ $row->[0] } = $row->[1];
267  # external->internal
268  $self->{'_external_seq_region_mapping'}->{ $row->[1] } = $row->[0];
269  }
270 
271  $sth->finish();
272 
273 } ## end sub _cache_seq_region_mapping
274 
275 
276 sub _cache_mapping_paths {
277  # Retrieve a list of available mappings from the meta table. This
278  # may eventually be moved a table of its own if this proves too
279  # cumbersome.
280 
281  my ($self) = @_;
282 
283  my %mapping_paths;
284  my $mc = $self->db()->get_MetaContainer();
285 
286 MAP_PATH:
287  foreach
288  my $map_path ( @{ $mc->list_value_by_key('assembly.mapping') } )
289  {
290  my @cs_strings = split( /[|#]/, $map_path );
291 
292  if ( scalar(@cs_strings) < 2 ) {
293  warning( "Incorrectly formatted assembly.mapping value in meta "
294  . "table: $map_path" );
295  next MAP_PATH;
296  }
297 
298  my @coord_systems;
299  foreach my $cs_string (@cs_strings) {
300  my ( $name, $version ) = split( /:/, $cs_string );
301 
302  my $cs = $self->fetch_by_name( $name, $version );
303 
304  if ( !defined($cs) ) {
305  warning( "Unknown coordinate system specified in meta table "
306  . " assembly.mapping:\n $name:$version" );
307  next MAP_PATH;
308  }
309 
310  push( @coord_systems, $cs );
311  }
312 
313  # If the delimiter is a '#' we want a special case, multiple parts
314  # of the same component map to the same assembly part. As this
315  # looks like the "long" mapping, we just make the path a bit longer
316  # :-)
317 
318  if ( index( $map_path, '#' ) != -1 && scalar(@coord_systems) == 2 )
319  {
320  splice( @coord_systems, 1, 0, (undef) );
321  }
322 
323  my $cs1 = $coord_systems[0];
324  my $cs2 = $coord_systems[$#coord_systems];
325 
326  my $key1 = $cs1->name();
327  $key1 .= ':' . $cs1->version() if $cs1->version();
328  my $key2 = $cs2->name();
329  $key2 .= ':' . $cs2->version() if $cs2->version();
330 
331  if ( exists( $mapping_paths{"$key1|$key2"} ) ) {
332  warning( "Meta table specifies multiple mapping paths between "
333  . "coord systems $key1 and $key2.\n"
334  . "Choosing shorter path arbitrarily." );
335 
336  if ( scalar( @{ $mapping_paths{"$key1|$key2"} } ) <
337  scalar(@coord_systems) )
338  {
339  next MAP_PATH;
340  }
341  }
342 
343  $mapping_paths{"$key1|$key2"} = \@coord_systems;
344  } ## end foreach my $map_path ( @{ $mc...
345 
346  # Create the pseudo coord system 'toplevel' and cache it so that only
347  # one of these is created for each database.
348 
349  my $toplevel =
350  Bio::EnsEMBL::CoordSystem->new( -TOP_LEVEL => 1,
351  -NAME => 'toplevel',
352  -ADAPTOR => $self );
353 
354  $self->{'_top_level'} = $toplevel;
355  $self->{'_mapping_paths'} = \%mapping_paths;
356 
357  return 1;
358 } ## end sub _cache_mapping_paths
359 
360 =head2 fetch_all
361 
362  Arg [1] : none
363  Example : foreach my $cs (@{$csa->fetch_all()}) {
364  print $cs->name(), ' ', $cs->version(), "\n";
365  }
366  Description: Retrieves every coordinate system defined in the DB.
367  These will be returned in ascending order of rank. I.e.
368  The highest coordinate system with rank=1 would be first in the
369  array.
370  Returntype : listref of Bio::EnsEMBL::CoordSystems
371  Exceptions : none
372  Caller : general
373  Status : Stable
374 
375 =cut
376 
377 sub fetch_all {
378  my $self = shift;
379 
380  my @coord_systems;
381 
382  #order the array by rank in ascending order
383  foreach my $rank (sort {$a <=> $b} keys %{$self->{'_rank_cache'}}) {
384  push @coord_systems, $self->{'_rank_cache'}->{$rank};
385  }
386 
387  return \@coord_systems;
388 }
389 
390 
391 
392 =head2 fetch_by_rank
393 
394  Arg [1] : int $rank
395  Example : my $cs = $coord_sys_adaptor->fetch_by_rank(1);
396  Description: Retrieves a CoordinateSystem via its rank. 0 is a special
397  rank reserved for the pseudo coordinate system 'toplevel'.
398  undef is returned if no coordinate system of the specified rank
399  exists.
400  Returntype : Bio::EnsEMBL::CoordSystem
401  Exceptions : none
402  Caller : general
403  Status : Stable
404 
405 =cut
406 
407 sub fetch_by_rank {
408  my $self = shift;
409  my $rank = shift;
410 
411  throw("Rank argument must be defined.") if(!defined($rank));
412  throw("Rank argument must be a non-negative integer.") if($rank !~ /^\d+$/);
413 
414  if($rank == 0) {
415  return $self->fetch_top_level();
416  }
417 
418  return $self->{'_rank_cache'}->{$rank};
419 }
420 
421 
422 =head2 fetch_by_name
423 
424  Arg [1] : string $name
425  The name of the coordinate system to retrieve. Alternatively
426  this may be an alias for a real coordinate system. Valid
427  aliases are 'toplevel' and 'seqlevel'.
428  Arg [2] : string $version (optional)
429  The version of the coordinate system to retrieve. If not
430  specified the default version will be used.
431  Example : $coord_sys = $csa->fetch_by_name('clone');
432  $coord_sys = $csa->fetch_by_name('chromosome', 'NCBI33');
433  # toplevel is an pseudo coord system representing the highest
434  # coord system in a given region
435  # such as the chromosome coordinate system
436  $coord_sys = $csa->fetch_by_name('toplevel');
437  #seqlevel is an alias for the sequence level coordinate system
438  #such as the clone or contig coordinate system
439  $coord_sys = $csa->fetch_by_name('seqlevel');
440  Description: Retrieves a coordinate system by its name
441  Returntype : Bio::EnsEMBL::CoordSystem
442  Exceptions : throw if no name argument provided
443  warning if no version provided and default does not exist
444  Caller : general
445  Status : Stable
446 
447 =cut
448 
449 sub fetch_by_name {
450  my $self = shift;
451  my $name = lc(shift); #case insensitve matching
452  my $version = shift;
453 
454  throw('Name argument is required.') if(!$name);
455 
456  $version = lc($version) if($version);
457 
458 
459  if($name eq 'seqlevel') {
460  return $self->fetch_sequence_level();
461  } elsif($name eq 'toplevel') {
462  return $self->fetch_top_level($version);
463  }
464 
465  if(!exists($self->{'_name_cache'}->{$name})) {
466  if($name =~ /top/) {
467  warning("Did you mean 'toplevel' coord system instead of '$name'?");
468  } elsif($name =~ /seq/) {
469  warning("Did you mean 'seqlevel' coord system instead of '$name'?");
470  }
471  return undef;
472  }
473 
474  my @coord_systems = @{$self->{'_name_cache'}->{$name}};
475 
476  foreach my $cs (@coord_systems) {
477  if($version) {
478  return $cs if(lc($cs->version()) eq $version);
479  } elsif($self->{'_is_default_version'}->{$cs->dbID()}) {
480  return $cs;
481  }
482  }
483 
484  if($version) {
485  #the specific version we were looking for was not found
486  return undef;
487  }
488 
489  #didn't find a default, just take first one
490  my $cs = shift @coord_systems;
491  my $v = $cs->version();
492  warning("No default version for coord_system [$name] exists. " .
493  "Using version [$v] arbitrarily");
494 
495  return $cs;
496 }
497 
498 
499 =head2 fetch_all_by_name
500 
501  Arg [1] : string $name
502  The name of the coordinate system to retrieve. This can be
503  the name of an actual coordinate system or an alias for a
504  coordinate system. Valid aliases are 'toplevel' and 'seqlevel'.
505  Example : foreach my $cs (@{$csa->fetch_all_by_name('chromosome')}){
506  print $cs->name(), ' ', $cs->version();
507  }
508  Description: Retrieves all coordinate systems of a particular name
509  Returntype : listref of Bio::EnsEMBL::CoordSystem objects
510  Exceptions : throw if no name argument provided
511  Caller : general
512  Status : Stable
513 
514 =cut
515 
516 sub fetch_all_by_name {
517  my $self = shift;
518  my $name = lc(shift); #case insensitive matching
519 
520  throw('Name argument is required') if(!$name);
521 
522  if($name eq 'seqlevel') {
523  return [$self->fetch_sequence_level()];
524  } elsif($name eq 'toplevel') {
525  return [$self->fetch_top_level()];
526  }
527 
528  return $self->{'_name_cache'}->{$name} || [];
529 }
530 
531 =head2 fetch_all_by_version
532 
533  Arg [1] : string $version
534  The version of the coordinate systems to retrieve.
535  Example : foreach my $cs (@{$csa->fetch_all_by_version('GRCh37')}){
536  print $cs->name(), ' ', $cs->version();
537  }
538  Description: Retrieves all coordinate systems of a particular version
539  Returntype : ArrayRef of Bio::EnsEMBL::CoordSystem objects
540  Exceptions : throw if no name argument provided
541  Caller : general
542  Status : Stable
543 
544 =cut
545 
546 sub fetch_all_by_version {
547  my ($self, $version) = @_;
548  throw "Version argument is required" if ! $version;
549  my @coord_systems;
550 
551  foreach my $rank (sort {$a <=> $b} keys %{$self->{'_rank_cache'}}) {
552  if ($self->{'_rank_cache'}->{$rank}->version()) {
553  if ($self->{'_rank_cache'}->{$rank}->version() eq $version) {
554  push @coord_systems, $self->{'_rank_cache'}->{$rank};
555  }
556  } else {
557  if (!$version) {
558  push @coord_systems, $self->{'_rank_cache'}->{$rank};
559  }
560  }
561  }
562  return \@coord_systems;
563 }
564 
565 =head2 fetch_by_dbID
566 
567  Arg [1] : int dbID
568  Example : $cs = $csa->fetch_by_dbID(4);
569  Description: Retrieves a coord_system via its internal
570  identifier, or undef if no coordinate system with the provided
571  id exists.
572  Returntype : Bio::EnsEMBL::CoordSystem or undef
573  Exceptions : thrown if no coord_system exists for specified dbID
574  Caller : general
575  Status : Stable
576 
577 =cut
578 
579 sub fetch_by_dbID {
580  my $self = shift;
581  my $dbID = shift;
582 
583  throw('dbID argument is required') if(!$dbID);
584 
585  my $cs = $self->{'_dbID_cache'}->{$dbID};
586 
587  return undef if(!$cs);
588 
589  return $cs;
590 }
591 
592 
593 
594 =head2 fetch_top_level
595 
596  Arg [1] : none
597  Example : $cs = $csa->fetch_top_level();
598  Description: Retrieves the toplevel pseudo coordinate system.
599  Returntype : Bio::EnsEMBL::CoordSystem object
600  Exceptions : none
601  Caller : general
602  Status : Stable
603 
604 =cut
605 
606 sub fetch_top_level {
607  my $self = shift;
608 
609  return $self->{'_top_level'};
610 }
611 
612 
613 =head2 fetch_sequence_level
614 
615  Arg [1] : none
616  Example : ($id, $name, $version) = $csa->fetch_sequence_level();
617  Description: Retrieves the coordinate system at which sequence
618  is stored at.
619  Returntype : Bio::EnsEMBL::CoordSystem
620  Exceptions : throw if no sequence_level coord system exists at all
621  throw if multiple sequence_level coord systems exists
622  Caller : general
623  Status : Stable
624 
625 =cut
626 
627 sub fetch_sequence_level {
628  my $self = shift;
629 
630  my @dbIDs = keys %{$self->{'_is_sequence_level'}};
631 
632  throw('No sequence_level coord_system is defined') if(!@dbIDs);
633 
634  if(@dbIDs > 1) {
635  throw('Multiple sequence_level coord_systems are defined.' .
636  'Only one is currently supported');
637  }
638 
639  return $self->{'_dbID_cache'}->{$dbIDs[0]};
640 }
641 =head2 get_default_version
642 
643  Arg [1] : none
644  Example : $version = $csa->get_default_version();
645  Description: Retrieves the default version of the assembly
646  Returntype : String
647  Exceptions : throw if no default version is defined
648  Caller : general
649  Status : Stable
650 
651 =cut
652 
653 sub get_default_version {
654  my $self = shift;
655 
656  my $version;
657  foreach my $dbID (keys %{$self->{'_is_default_version'}}) {
658  if ($self->{'_dbID_cache'}->{$dbID}->version) {
659  $version = $self->{'_dbID_cache'}->{$dbID}->version;
660  last;
661  }
662  }
663 
664  return $version;
665 }
666 
667 
668 
669 =head2 get_all_versions
670 
671  Arg [1] : none
672  Example : @versions = $csa->get_all_versions();
673  Description: Retrieves all the available versions of assemblies
674  Returntype : Listref of versions (strings)
675  Exceptions : throw if no version is defined
676  Caller : general
677  Status : Stable
678 
679 =cut
680 
681 sub get_all_versions {
682  my $self = shift;
683 
684  my %hash_versions;
685  my @versions;
686  my $version;
687  foreach my $dbID (sort {$a <=> $b} keys %{$self->{'_rank_cache'}}) {
688  if ($self->{'_rank_cache'}->{$dbID}->version) {
689  $version = $self->{'_rank_cache'}->{$dbID}->version;
690  if (!$hash_versions{$version}) {
691  $hash_versions{$version} = 1;
692  push @versions, $version;
693  }
694  }
695  }
696 
697  throw('No versions found') if(!scalar(@versions));
698 
699  return \@versions;
700 }
701 
702 
703 =head2 get_mapping_path
704 
705  Arg [1] : Bio::EnsEMBL::CoordSystem $cs1
706  Arg [2] : Bio::EnsEMBL::CoordSystem $cs2
707  Example : foreach my $cs @{$csa->get_mapping_path($cs1,$cs2);
708  Description: Given two coordinate systems this will return a mapping path
709  between them if one has been defined. Allowed Mapping paths are
710  explicitly defined in the meta table. The following is an
711  example:
712 
713  mysql> select * from meta where meta_key = 'assembly.mapping';
714  +---------+------------------+--------------------------------------+
715  | meta_id | meta_key | meta_value |
716  +---------+------------------+--------------------------------------+
717  | 20 | assembly.mapping | chromosome:NCBI34|contig |
718  | 21 | assembly.mapping | clone|contig |
719  | 22 | assembly.mapping | supercontig|contig |
720  | 23 | assembly.mapping | chromosome:NCBI34|contig|clone |
721  | 24 | assembly.mapping | chromosome:NCBI34|contig|supercontig |
722  | 25 | assembly.mapping | supercontig|contig|clone |
723  +---------+------------------+--------------------------------------+
724 
725  For a one-step mapping path to be valid there needs to be
726  a relationship between the two coordinate systems defined in
727  the assembly table. Two step mapping paths work by building
728  on the one-step mapping paths which are already defined.
729 
730  The first coordinate system in a one step mapping path must
731  be the assembled coordinate system and the second must be
732  the component.
733 
734  Example of use:
735  my $cs1 = $cs_adaptor->fetch_by_name('contig');
736  my $cs2 = $cs_adaptor->fetch_by_name('chromosome');
737 
738  my @path = @{$cs_adaptor->get_mapping_path($cs1,$cs2)};
739 
740  if(!@path) {
741  print "No mapping path.";
742  }
743  elsif(@path == 2) {
744  print "2 step mapping path.";
745  print "Assembled = " . $path[0]->name() . "\n";
746  print "Component = " . $path[1]->name() . "\n";
747  } else {
748  print "Multi step mapping path\n";
749  }
750 
751  Returntype : reference to a list of Bio::EnsEMBL::CoordSystem objects
752 
753  Exceptions : none
754  Caller : general
755  Status : Stable
756 
757 =cut
758 
759 sub get_mapping_path {
760  my $self = shift;
761  my $cs1 = shift;
762  my $cs2 = shift;
763 
764  if(!ref($cs1) || !ref($cs2) ||
765  !$cs1->isa('Bio::EnsEMBL::CoordSystem') ||
766  !$cs2->isa('Bio::EnsEMBL::CoordSystem')) {
767  throw('Two Bio::EnsEMBL::CoordSystem arguments expected.');
768  }
769 
770  my $key1 = $cs1->name();
771  $key1 .= ':' . $cs1->version() if ($cs1->version());
772  my $key2 = $cs2->name();
773  $key2 .= ':' . $cs2->version() if ($cs2->version());
774 
775  my $path = $self->{'_mapping_paths'}->{"$key1|$key2"};
776 
777  return $path if($path);
778 
779  $path = $self->{'_mapping_paths'}->{"$key2|$key1"};
780 
781  if(!$path) {
782  # No path was explicitly defined, but we might be able to guess a
783  # suitable path. We only guess for missing 2 step paths.
784 
785  my %mid1;
786  my %mid2;
787 
788  foreach my $path (values(%{$self->{'_mapping_paths'}})) {
789  next if(@$path != 2);
790 
791  my $match = undef;
792 
793  if($path->[0]->equals($cs1)) {
794  $match = 1;
795  } elsif($path->[1]->equals($cs1)) {
796  $match = 0;
797  }
798 
799  if(defined($match)) {
800  my $mid = $path->[$match];
801  my $midkey = $mid->name() . ':' . $mid->version();
802 
803  # is the same cs mapped to by other cs?
804  if($mid2{$midkey}) {
805  my $path = [$cs1,$mid,$cs2];
806  $self->{'_mapping_paths'}->{"$key1|$key2"} = $path;
807  $key1 =~ s/\:$//;
808  $key2 =~ s/\:$//;
809  $midkey =~ s/\:$//;
810  warning("Using implicit mapping path between '$key1' and '$key2' " .
811  "coord systems.\n" .
812  "An explicit 'assembly.mapping' entry should be added " .
813  "to the meta table.\nExample: " .
814  "'$key1|$midkey|$key2'\n");
815  return $path;
816  } else {
817  $mid1{$midkey} = $mid;
818  }
819  }
820 
821  $match = undef;
822 
823  if($path->[0]->equals($cs2)) {
824  $match = 1;
825  } elsif($path->[1]->equals($cs2)) {
826  $match = 0;
827  }
828 
829 
830  if(defined($match)) {
831  my $mid = $path->[$match];
832  my $midkey = $mid->name() . ':' . $mid->version();
833 
834  # is the same cs mapped to by other cs?
835  if($mid1{$midkey}) {
836  my $path = [$cs2,$mid,$cs1];
837  $self->{'_mapping_paths'}->{"$key2|$key1"} = $path;
838 
839  $key1 =~ s/\:$//;
840  $key2 =~ s/\:$//;
841  $midkey =~ s/\:$//;
842  warning("Using implicit mapping path between '$key1' and '$key2' " .
843  "coord systems.\n" .
844  "An explicit 'assembly.mapping' entry should be added " .
845  "to the meta table.\nExample: " .
846  "'$key1|$midkey|$key2'\n");
847 
848  return $path;
849  } else {
850  $mid2{$midkey} = $mid;
851  }
852  }
853  }
854  }
855 
856  return $path || [];
857 }
858 
859 =head2 store_mapping_path
860 
861  Arg [1] : Bio::EnsEMBL::CoordSystem $cs1
862  Arg [2] : Bio::EnsEMBL::CoordSystem $cs2
863  Arg [3..n] : Bio::EnsEMBL::CoordSystem $cs3..$csN
864  Example : my $pathref = $csa->store_mapping_path($cs1,$cs2);
865  Description: Given two or more coordinate systems this will store
866  mapping paths between them in the database.
867 
868  For example, if $cs1 represents chrs of version
869  V1, $cs2 represents contigs, and $cs3 clones then, unless
870  they already exist, the following entries will be created
871  in the meta table;
872  +------------------+---------------------+
873  | meta_key | meta_value |
874  +------------------+---------------------+
875  | assembly.mapping | chr:V1|clone |
876  | assembly.mapping | clone|contig |
877  | assembly.mapping | chr:V1|clone|contig |
878  +------------------+---------------------+
879 
880 
881  For a one-step mapping path to be valid there needs to be
882  a relationship between the two coordinate systems defined in
883  the assembly table. Two step mapping paths work by building
884  on the one-step mapping paths which are already defined.
885 
886  The first coordinate system in a one step mapping path must
887  be the assembled coordinate system and the second must be
888  the component.
889 
890  Returntype : reference to a list of lists of new meta_value mapping strings
891  created for assembly.mapping
892  Exceptions : CoordSystems with no rank/duplicated rank
893  Caller : general
894  Status : Experimental
895 
896 =cut
897 
898 sub store_mapping_path{
899  my $self = shift;
900  my @csystems = @_;
901 
902  # Validate and sort the args
903  my %seen_ranks;
904  @csystems >= 2 or throw('Need two or more CoordSystems');
905  my $validate = sub{
906  ref($_[0]) && $_[0]->isa('Bio::EnsEMBL::CoordSystem') or
907  throw('CoordSystem argument expected.');
908  my $rank = $_[0]->rank ||
909  throw('CoordSystem has no rank: '.$_[0]->name);
910  $seen_ranks{$rank} &&
911  throw('CoordSystem '.$_[0]->name." shares rank $rank with ".
912  $seen_ranks{$rank}->name);
913  $seen_ranks{$rank} = $_[0];
914  };
915  @csystems = map{&{$validate}($_)} @csystems;
916  my ($key, @keys);
917  foreach my $cs (@csystems) {
918  $key = $cs->name();
919  if ($cs->version()) {
920  $key .= ":" . $cs->version();
921  }
922  push @keys, $key;
923  }
924 
925  # For each pair in the sorted list, store in the DB
926  my $meta = $self->db->get_MetaContainer;
927  my @retlist;
928  for( my $i=1; $i<@keys; $i++ ){
929  for( my $j=0; $j<(@keys-$i); $j++ ){
930  my $mapping = join( "|", @keys );
931 
932  my $mapping_key = join( "|", @keys );
933  # Skip existing
934  next if $self->{'_mapping_paths'}->{$mapping_key};
935 
936  # Update the database
937  $meta->store_key_value('assembly.mapping',$mapping);
938  push @retlist, $mapping;
939  }
940  }
941 
942  if( @retlist ){
943  # Update mapping path cache
944  $self->_cache_mapping_paths;
945  }
946 
947  # Return the mappings that we have just created
948  return [@retlist];
949 }
950 
951 
952 =head2 store_multiple_mapping_path
953 
954  Arg [1] : Bio::EnsEMBL::CoordSystem $cs1
955  Arg [2] : Bio::EnsEMBL::CoordSystem $cs2
956  Arg [3..n] : Bio::EnsEMBL::CoordSystem $cs3..$csN
957  Example : my $pathref = $csa->store_multiple_mapping_path($cs1,$cs2);
958  Description: Given two or more coordinate systems this will store
959  multiple mapping paths between them in the database.
960 
961  Works similarly to the store_mapping_path method
962  But will presume every coord system can be mapped in multiple
963  ways to the other coord systems
964  This is represented by the use of '#' instead of '|'
965  in the mapping key
966 
967  Returntype : reference to a list of lists of new meta_value mapping strings
968  created for assembly.mapping
969  Exceptions : CoordSystems with no rank/duplicated rank
970  Caller : general
971  Status : Experimental
972 
973 =cut
974 
975 sub store_multiple_mapping_path{
976  my $self = shift;
977  my @csystems = @_;
978 
979  # Validate and sort the args
980  my %seen_ranks;
981  @csystems >= 2 or throw('Need two or more CoordSystems');
982  my $validate = sub{
983  ref($_[0]) && $_[0]->isa('Bio::EnsEMBL::CoordSystem') or
984  throw('CoordSystem argument expected.');
985  my $rank = $_[0]->rank ||
986  throw('CoordSystem has no rank: '.$_[0]->name);
987  $seen_ranks{$rank} &&
988  throw('CoordSystem '.$_[0]->name." shares rank $rank with ".
989  $seen_ranks{$rank}->name);
990  $seen_ranks{$rank} = $_[0];
991  };
992  @csystems = map{&{$validate}($_)} @csystems;
993  my ($key, @keys);
994  foreach my $cs (@csystems) {
995  $key = $cs->name();
996  if ($cs->version()) {
997  $key .= ":" . $cs->version();
998  }
999  push @keys, $key;
1000  }
1001  # For each pair in the sorted list, store in the DB
1002  my $meta = $self->db->get_MetaContainer;
1003  my @retlist;
1004  for( my $i=1; $i<@keys; $i++ ){
1005  for( my $j=0; $j<(@keys-$i); $j++ ){
1006  my $mapping = join( "#", @keys );
1007 
1008  my $mapping_key = join( "#", @keys );
1009  # Skip existing
1010  next if $self->{'_mapping_paths'}->{$mapping_key};
1011 
1012  # Update the database
1013  $meta->store_key_value('assembly.mapping',$mapping) unless $meta->key_value_exists('assembly.mapping',$mapping);
1014  push @retlist, $mapping;
1015  }
1016  }
1017 
1018  if( @retlist ){
1019  # Update mapping path cache
1020  $self->_cache_mapping_paths;
1021  }
1022 
1023  # Return the mappings that we have just created
1024  return [@retlist];
1025 }
1026 
1027 
1028 
1029 =head2 fetch_by_attrib
1030 
1031  Arg [1] : string attrib
1032  Arg [2] : (optional) string version
1033  Example : $csa->fetch_by_attrib('default_version','NCBIM37');
1034  Description: Retrieves a CoordSystem object from the database that have the specified
1035  attrib and version, if no version is specified, returns the default version
1036  Returntype : Bio::EnsEMBL::CoordSystem object
1037  Exceptions : throw when attrib not present
1038  Caller : general
1039  Status : Stable
1040 
1041 =cut
1042 
1043 sub fetch_by_attrib {
1044  my $self = shift;
1045  my $attrib = shift;
1046  my $version = shift;
1047 
1048  $version = lc($version) if($version);
1049 
1050  my @dbIDs = keys %{$self->{"_is_$attrib"}};
1051 
1052  throw("No $attrib coordinate system defined") if(!@dbIDs);
1053 
1054  foreach my $dbID (@dbIDs) {
1055  my $cs = $self->{'_dbID_cache'}->{$dbID};
1056  if($version) {
1057  return $cs if(lc($version) eq $cs->version());
1058  } elsif($self->{'_is_default_version'}->{$dbID}) {
1059  return $cs;
1060  }
1061  }
1062 
1063  #specifically requested attrib system was not found
1064  if($version) {
1065  throw("$attrib coord_system with version [$version] does not exist");
1066  }
1067 
1068  #coordsystem with attrib exists but no default is defined:
1069  my $dbID = shift @dbIDs;
1070  my $cs = $self->{'_dbID_cache'}->{$dbID};
1071  my $v = $cs->version();
1072  warning("No default version for $attrib coord_system exists. " .
1073  "Using version [$v] arbitrarily");
1074 
1075  return $cs;
1076 }
1077 
1078 
1079 =head2 fetch_all_by_attrib
1080 
1081  Arg [1] : string attrib
1082  Example : $csa->fetch_all_by_attrib('default_version');
1083  Description: Retrieves all CoordSystem object from the database that have the specified
1084  attrib.
1085  Returntype : reference to a list of Bio::EnsEMBL::CoordSystem objects
1086  Exceptions : throw when attrib not present
1087  Caller : general
1088  Status : Stable
1089 
1090 =cut
1091 
1092 sub fetch_all_by_attrib {
1093  my $self = shift;
1094  my $attrib = shift;
1095 
1096  my @coord_systems = ();
1097  foreach my $dbID (keys %{$self->{"_is_$attrib"}}) {
1098  push @coord_systems, $self->{"_dbID_cache"}->{$dbID};
1099  }
1100 
1101  return \@coord_systems;
1102 }
1103 
1104 =head2 store
1105 
1106  Arg [1] : Bio::EnsEMBL::CoordSystem
1107  Example : $csa->store($coord_system);
1108  Description: Stores a CoordSystem object in the database.
1109  Returntype : none
1110  Exceptions : Warning if CoordSystem is already stored in this database.
1111  Caller : none
1112  Status : Stable
1113 
1114 =cut
1115 
1116 sub store {
1117  my $self = shift;
1118  my $cs = shift;
1119 
1120  if(!$cs || !ref($cs) || !$cs->isa('Bio::EnsEMBL::CoordSystem')) {
1121  throw('CoordSystem argument expected.');
1122  }
1123 
1124  my $db = $self->db();
1125  my $name = $cs->name();
1126  my $version = $cs->version();
1127  my $rank = $cs->rank();
1128 
1129  my $seqlevel = $cs->is_sequence_level();
1130  my $default = $cs->is_default();
1131 
1132  my $toplevel = $cs->is_top_level();
1133 
1134  if($toplevel) {
1135  throw("The toplevel CoordSystem cannot be stored");
1136  }
1137 
1138  #
1139  # Do lots of sanity checking to prevent bad data from being entered
1140  #
1141 
1142  if($cs->is_stored($db)) {
1143  warning("CoordSystem $name $version is already in db.\n");
1144  return;
1145  }
1146 
1147  if($name eq 'toplevel' || $name eq 'seqlevel' || !$name) {
1148  throw("[$name] is not a valid name for a CoordSystem.");
1149  }
1150 
1151  if($seqlevel && keys(%{$self->{'_is_sequence_level'}})) {
1152  throw("There can only be one sequence level CoordSystem.");
1153  }
1154 
1155  if(exists $self->{'_name_cache'}->{lc($name)}) {
1156  my @coord_systems = @{$self->{'_name_cache'}->{lc($name)}};
1157  foreach my $c (@coord_systems) {
1158  if(lc($c->version()) eq lc($version)) {
1159  warning("CoordSystem $name $version is already in db.\n");
1160  return;
1161  }
1162  if($default && $self->{'_is_default_version'}->{$c->dbID()}) {
1163  throw("There can only be one default version of CoordSystem $name");
1164  }
1165  }
1166  }
1167 
1168  if($rank !~ /^\d+$/) {
1169  throw("Rank attribute must be a positive integer not [$rank]");
1170  }
1171  if($rank == 0) {
1172  throw("Only toplevel CoordSystem may have rank of 0.");
1173  }
1174 
1175  if(defined($self->{'_rank_cache'}->{$rank})) {
1176  throw("CoordSystem with rank [$rank] already exists.");
1177  }
1178 
1179  my @attrib;
1180 
1181  push @attrib, 'default_version' if($default);
1182  push @attrib, 'sequence_level' if($seqlevel);
1183 
1184  my $attrib_str = (@attrib) ? join(',', @attrib) : undef;
1185 
1186  #
1187  # store the coordinate system in the database
1188  #
1189 
1190  my $sth =
1191  $db->dbc->prepare( 'INSERT INTO coord_system '
1192  . '( `name`, `version`, `attrib`, `rank`, `species_id` ) '
1193  . 'VALUES ( ?, ?, ?, ?, ? )' );
1194 
1195  $sth->bind_param( 1, $name, SQL_VARCHAR );
1196  $sth->bind_param( 2, $version, SQL_VARCHAR );
1197  $sth->bind_param( 3, $attrib_str, SQL_VARCHAR );
1198  $sth->bind_param( 4, $rank, SQL_INTEGER );
1199  $sth->bind_param( 5, $self->species_id(), SQL_INTEGER );
1200 
1201  $sth->execute();
1202  my $dbID = $self->last_insert_id('coord_system_id', undef, 'coord_system');
1203  $sth->finish();
1204 
1205  if(!$dbID) {
1206  throw("Did not get dbID from store of CoordSystem.");
1207  }
1208 
1209  $cs->dbID($dbID);
1210  $cs->adaptor($self);
1211 
1212  #
1213  # update the internal caches that are used for fetching
1214  #
1215  $self->{'_is_default_version'}->{$dbID} = 1 if($default);
1216  $self->{'_is_sequence_level'}->{$dbID} = 1 if($seqlevel);
1217 
1218  $self->{'_name_cache'}->{lc($name)} ||= [];
1219  push @{$self->{'_name_cache'}->{lc($name)}}, $cs;
1220 
1221  $self->{'_dbID_cache'}->{$dbID} = $cs;
1222  $self->{'_rank_cache'}->{$rank} = $cs;
1223 
1224  return $cs;
1225 }
1226 
1227 =head2 remove
1228 
1229  Arg [1] : Bio::EnsEMBL::CoordSystem
1230  Example : $csa->remove($coord_system);
1231  Description: Removes a CoordSystem object from the database.
1232  Returntype : none
1233  Exceptions : Warning if CoordSystem is not stored in this database.
1234  Caller : none
1235  Status : Stable
1236 
1237 =cut
1238 
1239 sub remove {
1240  my $self = shift;
1241  my $cs = shift;
1242 
1243  if(!$cs || !ref($cs) || !$cs->isa('Bio::EnsEMBL::CoordSystem')) {
1244  throw('CoordSystem argument expected.');
1245  }
1246 
1247  my $db = $self->db();
1248  my $name = $cs->name();
1249  my $version = $cs->version();
1250  my $dbID = $cs->dbID();
1251  my $rank = $cs->rank();
1252 
1253  #
1254  # Do lots of sanity checking to prevent bad data from being entered
1255  #
1256 
1257  if(!$cs->is_stored($db)) {
1258  warning("CoordSystem $name $version does not exist in db.\n");
1259  return;
1260  }
1261 
1262  if($name eq 'toplevel' || $name eq 'seqlevel' || !$name) {
1263  throw("[$name] is not a valid name for a CoordSystem.");
1264  }
1265 
1266  #
1267  # remove the coordinate system from in the database
1268  #
1269 
1270  my $sql = "DELETE FROM coord_system WHERE name = ?";
1271  if ($version) {
1272  $sql .= " AND version = ?";
1273  }
1274 
1275  my $sth = $db->dbc->prepare($sql);
1276 
1277  $sth->bind_param( 1, $name, SQL_VARCHAR );
1278  $sth->bind_param( 2, $version, SQL_VARCHAR ) if $version;
1279 
1280  $sth->execute();
1281  $sth->finish();
1282 
1283  delete $self->{'_name_cache'}->{lc($name)};
1284 
1285  delete $self->{'_dbID_cache'}->{$dbID};
1286  delete $self->{'_rank_cache'}->{$rank};
1287  delete $self->{'_is_sequence_level'}->{$dbID};
1288  delete $self->{'_is_default_version'}->{$dbID};
1289  $cs->dbID(undef);
1290  $cs->adaptor(undef);
1291 
1292  return $cs;
1293 }
1294 
1295 1;
Bio::EnsEMBL::Registry::get_adaptor
public Adaptor get_adaptor()
EnsEMBL
Definition: Filter.pm:1
map
public map()
Bio::EnsEMBL::CoordSystem::name
public String name()
Bio::EnsEMBL::CoordSystem::new
public Bio::EnsEMBL::CoordSystem new()
Bio::EnsEMBL::CoordSystem
Definition: CoordSystem.pm:40
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
Bio::EnsEMBL::DBSQL::BaseAdaptor::prepare
public DBI::StatementHandle prepare()
Bio::EnsEMBL::CoordSystem::version
public String version()
Bio::EnsEMBL::CoordSystem::rank
public Int rank()
Bio::EnsEMBL::DBSQL::BaseAdaptor
Definition: BaseAdaptor.pm:71
Bio::EnsEMBL::Registry::load_registry_from_db
public Int load_registry_from_db()
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::DBSQL::CoordSystemAdaptor
Definition: CoordSystemAdaptor.pm:91