ensembl-hive  2.7.0
StableIdHistoryTree.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::StableIdHistoryTree - object representing a stable ID history tree
34 
35 =head1 SYNOPSIS
36 
37  my $registry = "Bio::EnsEMBL::Registry";
38  my $archiveStableIdAdaptor =
39  $registry->get_adaptor( 'human', 'core', 'ArchiveStableId' );
40 
41  my $stable_id = 'ENSG00000068990';
42  my $history =
43  $archiveStableIdAdaptor->fetch_history_tree_by_stable_id('ENSG01');
44 
45  print "Unique stable IDs in this tree:\n";
46  print join( ", ", @{ $history->get_unique_stable_ids } ), "\n";
47 
48  print "\nReleases in this tree:\n";
49  print join( ", ", @{ $history->get_release_display_names } ), "\n";
50 
51  print "\nCoordinates of nodes in the tree:\n\n";
52  foreach my $a ( @{ $history->get_all_ArchiveStableIds } ) {
53  print " Stable ID: " . $a->stable_id . "." . $a->version . "\n";
54  print " Release: "
55  . $a->release . " ("
56  . $a->assembly . ", "
57  . $a->db_name . ")\n";
58  print " coords: "
59  . join( ', ', @{ $history->coords_by_ArchiveStableId($a) } )
60  . "\n\n";
61  }
62 
63 =head1 DESCRIPTION
64 
65 This object represents a stable ID history tree graph.
66 
67 The graph is implemented as a collection of nodes (ArchiveStableId
68 objects) and links (StableIdEvent objects) which have positions
69 on an (x,y) grid. The x axis is used for releases, the y axis for
70 stable_ids. The idea is to create a plot similar to this (the numbers
71 shown on the nodes are the stable ID versions):
72 
73  ENSG001 1-------------- 2--
74  \
75  ENSG003 1-----1
76  /
77  ENSG002 1-------2----------
78 
79  38 39 40 41 42
80 
81 The grid coordinates of the ArchiveStableId objects in this example
82 would be (note that coordinates are zero-based):
83 
84  ENSG001.1 (0, 0)
85  ENSG001.2 (2, 0)
86  ENSG003.1 (release 41) (3, 1)
87  ENSG003.1 (release 42) (4, 1)
88  ENSG002.1 (0, 2)
89  ENSG002.2 (1, 2)
90 
91 The tree will only contain those nodes which had a change in the stable
92 ID version. Therefore, in the above example, in release 39 ENSG001 was
93 present and had version 1 (but will not be drawn there, to unclutter the
94 output).
95 
96 The grid positions will be calculated by the API and will try to
97 untangle the tree (i.e. try to avoid overlapping lines).
98 
99 =head1 METHODS
100 
101  new
102  add_ArchiveStableIds
103  add_ArchiveStableIds_for_events
104  remove_ArchiveStableId
105  flush_ArchiveStableIds
106  add_StableIdEvents
107  remove_StableIdEvent
108  flush_StableIdEvents
109  get_all_ArchiveStableIds
110  get_all_StableIdEvents
111  get_latest_StableIdEvent
112  get_release_display_names
113  get_release_db_names
114  get_unique_stable_ids
115  optimise_tree
116  coords_by_ArchiveStableId
117  calculate_coords
118  consolidate_tree
119  reset_tree
120  current_dbname
121  current_release
122  current_assembly
123  is_incomplete
124 
125 =head1 RELATED MODULES
126 
130 
131 =cut
132 
133 package Bio::EnsEMBL::StableIdHistoryTree;
134 
135 use strict;
136 use warnings;
137 no warnings 'uninitialized';
138 
139 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
140 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
141 use Time::HiRes;
142 
143 
144 =head2 new
145 
146  Arg [CURRENT_DBNAME] : (optional) name of current db
147  Arg [CURRENT_RELEASE] : (optional) current release number
148  Arg [CURRENT_ASSEMBLY] : (optional) current assembly name
149  Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
150  Description : object constructor
152  Exceptions : none
153  Caller : general
154  Status : At Risk
155  : under development
156 
157 =cut
158 
159 sub new {
160  my $caller = shift;
161  my $class = ref($caller) || $caller;
162 
163  my $self = {};
164  bless $self, $class;
165 
166  my ($current_dbname, $current_release, $current_assembly) =
167  rearrange([qw( CURRENT_DBNAME CURRENT_RELEASE CURRENT_ASSEMBLY )], @_ );
168 
169  # initialise
170  $self->{'current_dbname'} = $current_dbname;
171  $self->{'current_release'} = $current_release;
172  $self->{'current_assembly'} = $current_assembly;
173 
174  return $self;
175 }
176 
177 
178 =head2 add_ArchiveStableIds
179 
180  Arg[1..n] : Bio::EnsEMBL::ArchiveStableId's @archive_ids
181  The ArchiveStableIds to add to the the history tree
182  Example : my $archive_id = $archiveStableIdAdaptor->fetch_by_stable_id(
183  'ENSG00024808');
184  $history->add_ArchiveStableId($archive_id);
185  Description : Adds ArchiveStableIds (nodes) to the history tree. No
186  calculation of grid coordinates is done at this point, you need
187  to initiate this manually with calculate_coords().
188  ArchiveStableIds are only added once for each release (to avoid
189  duplicates).
190  Return type : none
191  Exceptions : thrown on invalid or missing argument
192  Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
193  Status : At Risk
194  : under development
195 
196 =cut
197 
198 sub add_ArchiveStableIds {
199  my ($self, @archive_ids) = @_;
200 
201  throw("You must provide one or more Bio::EnsEMBL::ArchiveStableIds to add.")
202  unless (@archive_ids);
203 
204  foreach my $archive_id (@archive_ids) {
205  throw("Bio::EnsEMBL::ArchiveStableId object expected.")
206  unless (ref($archive_id) &&
207  $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
208 
209  $self->{'nodes'}->{$self->_node_id($archive_id)} = $archive_id;
210  }
211 }
212 
213 
214 =head2 add_ArchiveStableIds_for_events
215 
216  Example : my $history = Bio::EnsEMBL::StableIdHistoryTree->new;
217  $history->add_StableIdEvents($event1, $event2);
218  $history->add_ArchiveStableIds_for_events;
219  Description : Convenience method that adds all ArchiveStableIds for all
220  StableIdEvents attached to this object to the tree.
221  Return type : none
222  Exceptions : none
223  Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
224  Status : At Risk
225  : under development
226 
227 =cut
228 
229 sub add_ArchiveStableIds_for_events {
230  my $self = shift;
231 
232  foreach my $event (@{ $self->get_all_StableIdEvents }) {
233  if ($event->old_ArchiveStableId) {
234  $self->add_ArchiveStableIds($event->old_ArchiveStableId);
235  }
236  if ($event->new_ArchiveStableId) {
237  $self->add_ArchiveStableIds($event->new_ArchiveStableId);
238  }
239  }
240 }
241 
242 
243 =head2 remove_ArchiveStableId
244 
245  Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id
246  the ArchiveStableId to remove from the tree
247  Example : $history->remove_ArchiveStableId($archive_id);
248  Description : Removes an ArchiveStableId from the tree.
249  Return type : none
250  Exceptions : thrown on missing or invalid argument
251  Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
252  Status : At Risk
253  : under development
254 
255 =cut
256 
257 sub remove_ArchiveStableId {
258  my ($self, $archive_id) = @_;
259 
260  throw("Bio::EnsEMBL::ArchiveStableId object expected.")
261  unless ($archive_id && ref($archive_id) &&
262  $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
263 
264  my %nodes = %{ $self->{'nodes'} };
265  delete $nodes{$self->_node_id($archive_id)};
266  $self->{'nodes'} = \%nodes;
267 }
268 
269 
270 =head2 flush_ArchiveStableIds
271 
272  Example : $history->flush_ArchiveStableIds;
273  Description : Remove all ArchiveStableIds from the tree.
274  Return type : none
275  Exceptions : none
276  Caller : general
277  Status : At Risk
278  : under development
279 
280 =cut
281 
282 sub flush_ArchiveStableIds {
283  my $self = shift;
284  $self->{'nodes'} = undef;
285 }
286 
287 
288 #
289 # generate a unique node identifier
290 #
291 sub _node_id {
292  my ($self, $archive_id) = @_;
293  return $archive_id->stable_id . ':' . $archive_id->db_name;
294 }
295 
296 
297 =head2 add_StableIdEvents
298 
299  Arg[1..n] : Bio::EnsEMBL::StableIdEvent's @events
300  The StableIdEvents to add to the the history tree
301  Example : $history->add_StableIdEvents($event);
302  Description : Adds StableIdEvents (links) to the history tree. Note that
303  ArchiveStableIds attached to the StableIdEvent aren't added to
304  the tree automatically, you'll need to call
305  add_ArchiveStableIds_for_events later.
306  Return type : none
307  Exceptions : thrown on invalid or missing argument
308  Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
309  Status : At Risk
310  : under development
311 
312 =cut
313 
314 sub add_StableIdEvents {
315  my ($self, @events) = @_;
316 
317  throw("You must provide one or more Bio::EnsEMBL::StableIdsEvents to add.")
318  unless (@events);
319 
320  foreach my $event (@events) {
321  throw("Bio::EnsEMBL::StableIdEvent object expected.")
322  unless ($event->isa('Bio::EnsEMBL::StableIdEvent'));
323 
324  $self->{'links'}->{$self->_link_id($event)} = $event;
325  }
326 }
327 
328 
329 =head2 remove_StableIdEvent
330 
331  Arg[1] : Bio::EnsEMBL::StableIdEvent $event
332  the StableIdEvent to remove from the tree
333  Example : $history->remove_StableIdEvent($event);
334  Description : Removes a StableIdEvent from the tree.
335  Return type : none
336  Exceptions : thrown on missing or invalid arguments
337  Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::fetch_history_by_stable_id, general
338  Status : At Risk
339  : under development
340 
341 =cut
342 
343 sub remove_StableIdEvent {
344  my ($self, $event) = @_;
345 
346  throw("Bio::EnsEMBL::StableIdEvent object expected.") unless
347  ($event && ref($event) && $event->isa('Bio::EnsEMBL::StableIdEvent'));
348 
349  my %links = %{ $self->{'links'} };
350  delete $links{$self->_link_id($event)};
351  $self->{'links'} = \%links;
352 }
353 
354 
355 =head2 flush_StableIdEvents
356 
357  Example : $history->flush_StableIdEvents;
358  Description : Removes all StableIdEvents from the tree.
359  Return type : none
360  Exceptions : none
361  Caller : general
362  Status : At Risk
363  : under development
364 
365 =cut
366 
367 sub flush_StableIdEvents {
368  my $self = shift;
369  $self->{'links'} = undef;
370 }
371 
372 
373 #
374 # generate a unique link identifier
375 #
376 sub _link_id {
377  my ($self, $event) = @_;
378 
379  my ($old_id, $old_db_name, $new_id, $new_db_name);
380  if ($event->old_ArchiveStableId) {
381  $old_id = $event->old_ArchiveStableId->stable_id;
382  $old_db_name = $event->old_ArchiveStableId->db_name;
383  }
384  if ($event->new_ArchiveStableId) {
385  $new_id = $event->new_ArchiveStableId->stable_id;
386  $new_db_name = $event->new_ArchiveStableId->db_name;
387  }
388 
389  return join(':', $old_id, $old_db_name, $new_id, $new_db_name);
390 }
391 
392 
393 =head2 get_all_ArchiveStableIds
394 
395  Example : foreach my $arch_id (@{ $history->get_all_ArchiveStableIds }) {
396  print $arch_id->stable_id, '.', $arch_id->version, "\n";
397  }
398  Description : Gets all ArchiveStableIds (nodes) in this tree.
399  Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
400  Exceptions : none
401  Caller : general
402  Status : At Risk
403  : under development
404 
405 =cut
406 
407 sub get_all_ArchiveStableIds {
408  my $self = shift;
409  return [ values %{ $self->{'nodes'} } ];
410 }
411 
412 
413 =head2 get_all_current_ArchiveStableIds
414 
415  Example : foreach my $arch_id (@{ $history->get_all_current_ArchiveStableIds }) {
416  print $arch_id->stable_id, '.', $arch_id->version, "\n";
417  }
418  Description : Convenience method to get all current ArchiveStableIds in this
419  tree.
420 
421  Note that no lazy loading of "current" status is done at that
422  stage; as long as you retrieve your StableIdHistoryTree object
423  from ArchiveStableIdAdaptor, you'll get the right answer. In
424  other use cases, if you want to make sure you really get all
425  current stable IDs, loop over the result of
426  get_all_ArchiveStableIds() and call
427  ArchiveStableId->current_version() on all of them.
428  Return type : Arrayref of Bio::EnsEMBL::ArchiveStableId objects
429  Exceptions : none
430  Caller : general
431  Status : At Risk
432  : under development
433 
434 =cut
435 
436 sub get_all_current_ArchiveStableIds {
437  my $self = shift;
438 
439  my @current = ();
440 
441  foreach my $arch_id (@{ $self->get_all_ArchiveStableIds }) {
442  push @current, $arch_id if ($arch_id->is_current);
443  }
444 
445  return \@current;
446 }
447 
448 
449 =head2 get_all_StableIdEvents
450 
451  Example : foreach my $event (@{ $history->get_all_StableIdsEvents }) {
452  print "Old stable ID: ",
453  ($event->get_attribute('old', 'stable_id') or 'none'), "\n";
454  print "New stable ID: ",
455  ($event->get_attribute('new', 'stable_id') or 'none'), "\n";
456  print "Mapping score: ", $event->score, "\n";
457  }
458  Description : Gets all StableIdsEvents (links) in this tree.
459  Return type : Arrayref of Bio::EnsEMBL::StableIdEvent objects
460  Exceptions : none
461  Caller : general
462  Status : At Risk
463  : under development
464 
465 =cut
466 
467 sub get_all_StableIdEvents {
468  my $self = shift;
469  return [ values %{ $self->{'links'} } ];
470 }
471 
472 
473 =head2 get_latest_StableIdEvent
474 
475  Arg[1] : Bio::EnsEMBL::ArchiveStableId $arch_id - the stable ID to get
476  the latest Event for
477  Example : my $arch_id = Bio::EnsEMBL::ArchiveStableId->new(
478  -stable_id => 'ENSG00001'
479  );
480  my $event = $history->get_latest_Event($arch_id);
481  Description : Returns the latest StableIdEvent found in the tree where a given
482  stable ID is the new stable ID. If more than one is found (e.g.
483  in a merge scenario in the latest mapping), preference is given
484  to self-events.
485  Return type : Bio::EnsEMBL::StableIdEvent
486  Exceptions : thrown on missing or wrong argument
487  Caller : Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor::add_all_current_to_history, general
488  Status : At Risk
489  : under development
490 
491 =cut
492 
493 sub get_latest_StableIdEvent {
494  my $self = shift;
495  my $arch_id = shift;
496 
497  unless ($arch_id and $arch_id->isa('Bio::EnsEMBL::ArchiveStableId')) {
498  throw("Need a Bio::EnsEMBL::ArchiveStableId.");
499  }
500 
501  my @all_events = @{ $self->get_all_StableIdEvents };
502  my @self_events = ();
503 
504  while (my $event = shift(@all_events)) {
505  if ($event->new_ArchiveStableId and
506  $event->new_ArchiveStableId->stable_id eq $arch_id->stable_id) {
507  push @self_events, $event;
508  }
509  }
510 
511  my @sorted = sort { $b->new_ArchiveStableId->release <=>
512  $a->new_ArchiveStableId->release } @self_events;
513 
514  # give priority to self events
515  my $latest;
516  while ($latest = shift @sorted) {
517  last if (($latest->old_ArchiveStableId and
518  $latest->old_ArchiveStableId->stable_id eq $arch_id->stable_id)
519  or !$latest->old_ArchiveStableId);
520  }
521 
522  return $latest;
523 }
524 
525 
526 =head2 get_release_display_names
527 
528  Example : print "Unique release display_names in this tree:\n"
529  foreach my $name (@{ $history->get_release_display_names }) {
530  print " $name\n";
531  }
532  Description : Returns a chronologically sorted list of unique release
533  display_names in this tree.
534 
535  This method can be used to determine the number of columns when
536  plotting the history tree.
537  Return type : Arrayref of strings.
538  Exceptions : none
539  Caller : general
540  Status : At Risk
541  : under development
542 
543 =cut
544 
545 sub get_release_display_names {
546  my $self = shift;
547 
548  my @display_names = map { $_->[1] } @{ $self->_sort_releases };
549 
550  return \@display_names;
551 }
552 
553 
554 =head2 get_release_db_names
555 
556  Example : print "Unique release db_names in this tree:\n"
557  foreach my $name (@{ $history->get_release_db_names }) {
558  print " $name\n";
559  }
560  Description : Returns a chronologically sorted list of unique release
561  db_names in this tree.
562  Return type : Arrayref of strings.
563  Exceptions : none
564  Caller : general
565  Status : At Risk
566  : under development
567 
568 =cut
569 
570 sub get_release_db_names {
571  my $self = shift;
572 
573  my @db_names = map { $_->[0] } @{ $self->_sort_releases };
574 
575  return \@db_names;
576 }
577 
578 
579 #
580 # Create a chronologically sorted list of releases.
581 #
582 # Return type : Arrayref of arrayrefs (db_name, release)
583 #
584 sub _sort_releases {
585  my $self = shift;
586 
587  unless ($self->{'sorted_tree'}->{'releases'}) {
588 
589  my %unique = ();
590 
591  foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
592  $unique{join(':', $archive_id->db_name, $archive_id->release)} = 1;
593  }
594 
595  # sort releases by release number, then db_name; this should get them into
596  # chronological order
597  my @releases = sort { $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] }
598  map { [ split(/:/, $_) ] } keys(%unique);
599 
600  $self->{'sorted_tree'}->{'releases'} = \@releases;
601 
602  }
603 
604  return $self->{'sorted_tree'}->{'releases'};
605 }
606 
607 
608 =head2 get_unique_stable_ids
609 
610  Example : print "Unique stable IDs in this tree:\n"
611  foreach my $id (@{ $history->get_unique_stable_ids }) {
612  print " $id\n";
613  }
614  Description : Returns a list of unique stable IDs in this tree. Version is not
615  taken into account here. This method can be used to determine
616  the number of rows when plotting the history with each stable ID
617  occupying one line.
618 
619  Sort algorithm will depend on what was chosen when the sorted
620  tree was generated. This ranges from a simple alphanumeric sort
621  to algorithms trying to untangle the history tree. If no
622  pre-sorted data is found, an alphanumerically sorted list will
623  be returned by default.
624  Return type : Arrayref of strings.
625  Exceptions : none
626  Caller : general
627  Status : At Risk
628  : under development
629 
630 =cut
631 
632 sub get_unique_stable_ids {
633  my $self = shift;
634 
635  unless ($self->{'sorted_tree'}->{'stable_ids'}) {
636  $self->{'sorted_tree'}->{'stable_ids'} = $self->_sort_stable_ids;
637  }
638 
639  return $self->{'sorted_tree'}->{'stable_ids'};
640 }
641 
642 
643 #
644 # Returns a list of stable IDs in this history tree, sorted alphabetically.
645 # This is the simplest sort function used and doesn't try to untangle the tree.
646 #
647 # Return type : Arrayref
648 #
649 sub _sort_stable_ids {
650  my $self = shift;
651  my %unique = map { $_->stable_id => 1 } @{ $self->get_all_ArchiveStableIds };
652  return [sort keys %unique];
653 }
654 
655 
656 =head2 optimise_tree
657 
658  Arg [1] : (optional) Float $time_limit
659  Optimise tree normally runs until it hits a minimised state
660  but this can take a very long time. Therefore you can
661  opt to bail out of the optimisation early. Specify the
662  time in seconds. Floating point values are supported should you
663  require sub-second limits
664  Example : $history->optimise_tree;
665  Description : This method sorts the history tree so that the number of
666  overlapping branches is minimised (thus "untangling" the tree).
667 
668  It uses a clustering algorithm for this which iteratively moves
669  the nodes with the largest vertical distance next to each other
670  and looking for a mininum in total branch length. This might not
671  produce the overall optimum but usually converges on a local
672  optimum very quickly.
673  Return type : none
674  Exceptions : none
675  Caller : calculate_coords
676  Status : At Risk
677  : under development
678 
679 =cut
680 
681 sub optimise_tree {
682  my $self = shift;
683  my $time_limit = shift;
684 
685  # get all non-self events
686  my @links;
687  foreach my $event (@{ $self->get_all_StableIdEvents }) {
688  next unless ($event->old_ArchiveStableId and $event->new_ArchiveStableId);
689  my $old_id = $event->old_ArchiveStableId->stable_id;
690  my $new_id = $event->new_ArchiveStableId->stable_id;
691  push @links, [$old_id, $new_id] if ($old_id ne $new_id);
692  }
693 
694  # get initial list of sorted unique stable IDs and put them into a position
695  # lookup hash
696  my $i = 0;
697  my %pos = map { $_ => $i++ } @{ $self->_sort_stable_ids };
698 
699  my $opt_length;
700  my $successive_fails = 0;
701  my $k = 0;
702  my %seen;
703 
704  # for debug purposes:
705  # find the number of permutations for the given number of stable IDs
706  my $fact = $self->_factorial(scalar(keys %pos));
707 
708  my $starting_time = Time::HiRes::time();
709 
710  OPT:
711  while ($successive_fails < 100) {
712 
713  if(defined $time_limit) {
714  my $current_time = Time::HiRes::time();
715  my $diff = $current_time - $starting_time;
716  last OPT if $diff > $time_limit;
717  }
718 
719  # sort links by vertical distance
720  #warn "sorting\n";
721  $self->_sort_links(\@links, \%pos);
722 
723  # loop over sorted links
724  SORTED:
725  foreach my $link (@links) {
726 
727  #warn " trying ".join('-', @$link)."\n";
728 
729  $k++;
730 
731  # remember last sort order
732  my %last = %pos;
733 
734  #my $this_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
735  #warn " before $this_order\n";
736 
737  # try both to move bottom node next to top node's current position and
738  # top node next to bottom node's position - one of the methods might give
739  # you better results
740  DIRECT:
741  foreach my $direction (qw(up down)) {
742 
743  # move the nodes next to each other
744  $self->_move_nodes($link, \%pos, $direction);
745 
746  # next if we've seen this sort order before
747  my $new_order = join(':', sort { $pos{$a} <=> $pos{$b} } keys %pos);
748  #warn " after ($direction) $new_order\n";
749  if ($seen{$new_order}) {
750  #warn " seen\n";
751  %pos = %last;
752  next DIRECT;
753  }
754  $seen{$new_order} = 1;
755 
756  # calculate total link length for this sort order
757  my $total_length = $self->_total_link_length(\@links, \%pos);
758 
759  if (!$opt_length or $total_length < $opt_length) {
760  #warn " better ($total_length/$opt_length)\n";
761  $opt_length = $total_length;
762  $successive_fails = 0;
763  next OPT;
764  } else {
765  #warn " worse ($total_length/$opt_length)\n";
766  %pos = %last;
767  $successive_fails++;
768  }
769  }
770 
771  }
772 
773  last OPT;
774 
775  }
776 
777  #warn "Needed $k tries (of $fact) to find optimal tree.\n";
778 
779  my @best = sort { $pos{$a} <=> $pos{$b} } keys %pos;
780  $self->{'sorted_tree'}->{'stable_ids'} = \@best;
781 }
782 
783 
784 #
785 # find the number of permutations for a give array size.
786 # used for debugging code (compare implemented algorithm to looping over all
787 # possible permutations).
788 #
789 sub _factorial {
790  my ($self, $n) = @_;
791  my $s = 1;
792  $s *= $n-- while $n > 0;
793  return $s;
794 }
795 
796 
797 #
798 # sort links by vertical distance
799 #
800 sub _sort_links {
801  my ($self, $links, $pos) = @_;
802 
803  my @lookup;
804 
805  foreach my $link (@$links) {
806  my $dist = $pos->{$link->[0]} - $pos->{$link->[1]};
807  $dist = -$dist if ($dist < 0);
808  push @lookup, [$dist, $link];
809  #warn " $dist ".join(' ', @$link)."\n";
810  }
811 
812  @$links = map { $_->[1] } sort { $b->[0] <=> $a->[0] } @lookup;
813 }
814 
815 
816 #
817 # make two nodes adjacent by moving the second node next to the first node
818 # all other node coordinates are adjusted accordingly
819 #
820 sub _move_nodes {
821  my ($self, $link, $pos, $direction) = @_;
822 
823  my $first_pos = $pos->{$link->[0]};
824  my $second_pos = $pos->{$link->[1]};
825 
826  # swap positions if necessary
827  if ($first_pos > $second_pos) {
828  my $tmp = $second_pos;
829  $second_pos = $first_pos;
830  $first_pos = $tmp;
831  }
832  #warn " $first_pos:$second_pos\n";
833 
834  foreach my $p (keys %$pos) {
835 
836  my $val = $pos->{$p};
837 
838  #warn " $p $val\n";
839  if ($direction eq 'up') {
840  if ($val > $first_pos and $val < $second_pos) {
841  $val++;
842  } elsif ($val == $second_pos) {
843  $val = $first_pos + 1;
844  }
845  } else {
846  if ($val > $first_pos and $val < $second_pos) {
847  $val--;
848  } elsif ($val == $first_pos) {
849  $val = $second_pos - 1;
850  }
851  }
852 
853  #warn " $p $val\n";
854  $pos->{$p} = $val;
855  #warn "\n";
856  }
857 }
858 
859 
860 #
861 # calculate the total link (vertical distance) length based on this sort order
862 #
863 sub _total_link_length {
864  my ($self, $links, $pos) = @_;
865 
866  my $total_length;
867 
868  foreach my $link (@$links) {
869  my $length = $pos->{$link->[0]} - $pos->{$link->[1]};
870  $length = -$length if ($length < 0);
871  $total_length += $length;
872  }
873 
874  return $total_length;
875 }
876 
877 
878 =head2 coords_by_ArchiveStableId
879 
880  Arg[1] : Bio::EnsEMBL::ArchiveStableId $archive_id
881  The ArchiveStableId to get tree grid coordinates for
882  Example : my ($x, $y) =
883  @{ $history->coords_by_ArchiveStableId($archive_id) };
884  print $archive_id->stable_id, " coords: $x, $y\n";
885  Description : Returns the coordinates of an ArchiveStableId in the history
886  tree grid. If the ArchiveStableId isn't found in this tree, an
887  empty list is returned.
888 
889  Coordinates are zero-based (i.e. the top leftmost element in
890  the grid has coordinates [0, 0], not [1, 1]). This is to
891  facilitate using them to create a matrix as a two-dimensional
892  array of arrays.
893  Return type : Arrayref (x coordinate, y coordinate)
894  Exceptions : thrown on wrong argument type
895  Caller : general
896  Status : At Risk
897  : under development
898 
899 =cut
900 
901 sub coords_by_ArchiveStableId {
902  my ($self, $archive_id) = @_;
903 
904  throw("Bio::EnsEMBL::ArchiveStableId object expected.")
905  unless ($archive_id and ref($archive_id) and
906  $archive_id->isa('Bio::EnsEMBL::ArchiveStableId'));
907 
908  return $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)}
909  || [];
910 }
911 
912 
913 =head2 calculate_coords
914 
915  Arg [1] : (optional) Float $time_limit
916  Optimise tree normally runs until it hits a minimised state
917  but this can take a very long time. Therefore you can
918  opt to bail out of the optimisation early. Specify the
919  time in seconds. Floating point values are supported should you
920  require sub-second limits
921  Example : $history->calculate_coords;
922  Description : Pre-calculates the grid coordinates of all nodes in the tree.
923  Return type : none
924  Exceptions : none
925  Caller : ArchiveStableIdAdaptor::fetch_history_by_stable_id
926  Status : At Risk
927  : under development
928 
929 =cut
930 
931 sub calculate_coords {
932  my $self = shift;
933  my $time_limit = shift;
934 
935  # reset any previous tree cordinate calculations
936  $self->reset_tree;
937 
938  # the "master" information for the sorted tree is stored as the sorted lists
939  # of releases (x) and stable IDs (y). Sort them now.
940  my $db_names = $self->get_release_db_names;
941 
942  # untangle tree by sorting stable IDs appropriately
943  $self->optimise_tree($time_limit);
944  my $stable_ids = $self->get_unique_stable_ids;
945 
946  # for performance reasons, additionally store coordinates in a lookup hash
947  foreach my $archive_id (@{ $self->get_all_ArchiveStableIds }) {
948 
949  # coordinates are positions in the sorted lists
950  my $x = $self->_index_of($archive_id->db_name, $db_names);
951  my $y = $self->_index_of($archive_id->stable_id, $stable_ids);
952 
953  $self->{'sorted_tree'}->{'coords'}->{$self->_node_id($archive_id)} =
954  [ $x, $y ];
955  }
956 }
957 
958 #
959 # Description : Returns the index of an element in an array
960 # Example : my @array = (a, b, c);
961 # my $i = _index_of('b', \@array); # will return 1
962 # Return type : Int (or undef if element is not found in array)
963 #
964 sub _index_of {
965  my ($self, $element, $arrayref) = @_;
966 
967  throw("Expecting arrayref argument.") unless (ref($arrayref) eq 'ARRAY');
968 
969  my @array = @$arrayref;
970 
971  while (my $e = pop(@array)) {
972  return scalar(@array) if ($e eq $element);
973  }
974 
975  return undef;
976 }
977 
978 
979 =head2 consolidate_tree
980 
981  Example : $history->consolidate_tree;
982  Description : Consolidate the history tree. This means removing nodes where
983  there wasn't a change and bridging gaps in the history. The end
984  result will be a sparse tree which only contains the necessary
985  information.
986  Return type : none
987  Exceptions : none
988  Caller : ArchiveStableIdAdaptor->fetch_history_tree_by_stable_id
989  Status : At Risk
990  : under development
991 
992 =cut
993 
994 sub consolidate_tree {
995  my $self = shift;
996 
997  #
998  # get all self-events and creations/deletions and sort them (by stable ID and
999  # chronologically)
1000  #
1001  my @event_lookup;
1002 
1003  foreach my $event (@{ $self->get_all_StableIdEvents }) {
1004 
1005  my $old_id = $event->old_ArchiveStableId;
1006  my $new_id = $event->new_ArchiveStableId;
1007 
1008  if (!$old_id or !$new_id or ($old_id->stable_id eq $new_id->stable_id)) {
1009  if ($old_id) {
1010  push @event_lookup, [$old_id->stable_id, $old_id->release,
1011  $old_id->db_name, $event];
1012  } else {
1013  push @event_lookup, [$new_id->stable_id, $new_id->release - 1,
1014  $new_id->db_name, $event];
1015  }
1016  }
1017  }
1018 
1019  my @self_events = map { $_->[3] }
1020  sort { $a->[0] cmp $b->[0] || $a->[1] <=> $b->[1] || $a->[2] cmp $b->[2] }
1021  @event_lookup;
1022 
1023  #
1024  # consolidate tree
1025  #
1026  my $last = shift(@self_events);
1027 
1028  while (my $event = shift(@self_events)) {
1029 
1030  my $lo = $last->old_ArchiveStableId;
1031  my $ln = $last->new_ArchiveStableId;
1032  my $eo = $event->old_ArchiveStableId;
1033  my $en = $event->new_ArchiveStableId;
1034 
1035  if ($lo and $eo and $en and $lo->stable_id eq $eo->stable_id
1036  and $lo->version eq $eo->version) {
1037 
1038  # this removes redundant nodes and connects the remaining nodes:
1039  #
1040  # o--o--o -> o-----o
1041  # 1 1 1 1 1
1042 
1043  #warn 'A: '.$last->ident_string.' | '.$event->ident_string."\n";
1044 
1045  $self->remove_StableIdEvent($last);
1046  $self->remove_StableIdEvent($event);
1047 
1048  $event->old_ArchiveStableId($lo);
1049 
1050  $self->add_StableIdEvents($event);
1051 
1052  } elsif ($ln and $eo and $ln->db_name ne $eo->db_name
1053  and $ln->stable_id eq $eo->stable_id and $ln->version eq $eo->version) {
1054 
1055  # try to brigde gaps
1056 
1057  if ($en) {
1058 
1059  # o--o o--o -> o--o-----o
1060  # 1 2 2 2 1 2 2
1061  #
1062  # o o--o -> o-----o
1063  # 1 1 1 1 1
1064 
1065  #warn 'X: '.$last->ident_string.' | '.$event->ident_string."\n";
1066 
1067  $self->remove_StableIdEvent($event);
1068  $event->old_ArchiveStableId($ln);
1069  $self->add_StableIdEvents($event);
1070 
1071  } elsif ($lo) {
1072 
1073  # there's a deletion event, deal with it differently
1074 
1075  if ($lo->version eq $ln->version) {
1076 
1077  # o--o o -> o-----o
1078  # 1 1 1 1 1
1079 
1080  #warn 'Y: '.$last->ident_string.' | '.$event->ident_string."\n";
1081 
1082  $self->remove_StableIdEvent($last);
1083  $last->new_ArchiveStableId($eo);
1084  $self->add_StableIdEvents($last);
1085 
1086  } else {
1087 
1088  # o--o o -> o--o--o
1089  # 1 2 2 1 2 2
1090 
1091  #warn 'Z: '.$last->ident_string.' | '.$event->ident_string."\n";
1092 
1093  $self->remove_StableIdEvent($event);
1094  $event->old_ArchiveStableId($ln);
1095  $event->new_ArchiveStableId($eo);
1096  $self->add_StableIdEvents($event);
1097 
1098  }
1099 
1100  } else {
1101 
1102  # creation followed by deletion in next mapping
1103  #
1104  # o o -> o--o
1105  # 1 1 1 1
1106 
1107  #warn 'Q: '.$last->ident_string.' | '.$event->ident_string."\n";
1108 
1109  $self->remove_StableIdEvent($last);
1110  $self->remove_StableIdEvent($event);
1111  $event->old_ArchiveStableId($ln);
1112  $event->new_ArchiveStableId($eo);
1113  $self->add_StableIdEvents($event);
1114 
1115  }
1116 
1117  } else {
1118  #warn 'C: '.$last->ident_string.' | '.$event->ident_string."\n";
1119  }
1120 
1121  $last = $event;
1122  }
1123 
1124  # now add ArchiveStableIds of the remaining events to the tree
1125  $self->add_ArchiveStableIds_for_events;
1126 }
1127 
1128 
1129 =head2 reset_tree
1130 
1131  Example : $history->reset_tree;
1132  Description : Resets all pre-calculated tree grid data. Mostly used internally
1133  by methods that modify the tree.
1134  Return type : none
1135  Exceptions : none
1136  Caller : internal
1137  Status : At Risk
1138  : under development
1139 
1140 =cut
1141 
1142 sub reset_tree {
1143  my $self = shift;
1144  $self->{'sorted_tree'} = undef;
1145 }
1146 
1147 
1148 =head2 current_dbname
1149 
1150  Arg[1] : (optional) String $dbname - the dbname to set
1151  Example : my $dbname = $history->current_dbname;
1152  Description : Getter/setter for current dbname.
1153  Return type : String
1154  Exceptions : none
1155  Caller : general
1156  Status : At Risk
1157  : under development
1158 
1159 =cut
1160 
1161 sub current_dbname {
1162  my $self = shift;
1163  $self->{'current_dbname'} = shift if (@_);
1164  return $self->{'current_dbname'};
1165 }
1166 
1167 
1168 =head2 current_release
1169 
1170  Arg[1] : (optional) Int $release - the release to set
1171  Example : my $release = $history->current_release;
1172  Description : Getter/setter for current release.
1173  Return type : Int
1174  Exceptions : none
1175  Caller : general
1176  Status : At Risk
1177  : under development
1178 
1179 =cut
1180 
1181 sub current_release {
1182  my $self = shift;
1183  $self->{'current_release'} = shift if (@_);
1184  return $self->{'current_release'};
1185 }
1186 
1187 
1188 =head2 current_assembly
1189 
1190  Arg[1] : (optional) String $assembly - the assembly to set
1191  Example : my $assembly = $history->current_assembly;
1192  Description : Getter/setter for current assembly.
1193  Return type : String
1194  Exceptions : none
1195  Caller : general
1196  Status : At Risk
1197  : under development
1198 
1199 =cut
1200 
1201 sub current_assembly {
1202  my $self = shift;
1203  $self->{'current_assembly'} = shift if (@_);
1204  return $self->{'current_assembly'};
1205 }
1206 
1207 
1208 =head2 is_incomplete
1209 
1210  Arg[1] : (optional) Boolean $incomplete
1211  Example : if ($history->is_incomplete) {
1212  print "Returned tree is incomplete due to too many mappings
1213  in the database.\n";
1214  }
1215  Description : Getter/setter for incomplete flag. This is used by
1216  ArchiveStableIdAdaptor to indicate that it finished building
1217  the tree prematurely due to too many mappins in the db and can
1218  be used by applications to print warning messages.
1219  Return type : Boolean
1220  Exceptions : none
1221  Caller : general
1222  Status : At Risk
1223  : under development
1224 
1225 =cut
1226 
1227 sub is_incomplete {
1228  my $self = shift;
1229  $self->{'incomplete'} = shift if (@_);
1230  return $self->{'incomplete'};
1231 }
1232 
1233 
1234 1;
1235 
Bio::EnsEMBL::ArchiveStableId
Definition: ArchiveStableId.pm:29
map
public map()
Bio::EnsEMBL::StableIdHistoryTree
Definition: StableIdHistoryTree.pm:73
Bio::EnsEMBL::ArchiveStableId::stable_id
public stable_id()
Bio::EnsEMBL::StableIdEvent
Definition: StableIdEvent.pm:36
Bio::EnsEMBL::DBSQL::ArchiveStableIdAdaptor
Definition: ArchiveStableIdAdaptor.pm:63
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68