3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
38 # create a new ScoredMappingMatrix
40 -DUMP_PATH => $dump_path,
41 -CACHE_FILE =>
'gene_scores.ser',
48 $gene_scores->write_to_file;
50 # later, read these gene_scores from file
52 -DUMP_PATH => $dump_path,
53 -CACHE_FILE =>
'gene_gene_scores.ser',
59 This
object represents a collection of scores between source and target
61 has methods to retrieve indiviual or all Entries, as well as derived
62 data like number of unique sources or targets, or various counts and
65 It is the
main collection
for dealing with scored relationships in the
66 stable Id mapping application.
80 get_targets_for_source
81 get_Entries_for_source
82 get_sources_for_target
83 get_Entries_for_target
99 package Bio::EnsEMBL::IdMapping::ScoredMappingMatrix;
103 no warnings
'uninitialized';
116 Arg[1-N] : see superclass
118 -DUMP_PATH => $dump_path,
119 -CACHE_FILE =>
'gene_scores.ser',
121 Description : Constructor.
132 my $class = ref($caller) || $caller;
133 my $self = $class->SUPER::new(@_);
135 # initialise internal datastructure
136 unless ($self->loaded) {
137 $self->{
'cache'}->{
'matrix'} = {};
138 $self->{
'cache'}->{
'source_list'} = {};
139 $self->{
'cache'}->{
'target_list'} = {};
148 Example : $gene_scores->
flush;
149 Description : Flushes (empties) the scoring matrix.
162 $self->{
'cache'}->{
'matrix'} = {};
163 $self->{
'cache'}->{
'source_list'} = {};
164 $self->{
'cache'}->{
'target_list'} = {};
170 Arg[1] : Int $start - start index (inclusive)
171 Arg[2] : Int $end - end index (inclusive)
172 Example : # get the first 1000 elements in the matrix
173 my $sub_matrix = $gene_scores->sub_matrix(1, 1000);
174 Description : Returns a sub-matrix of the ScoredMappingMatrix. The arguments
175 ($start and $end) specify the position of the first and last
176 element to
return (inclusive, counting starts with element 1,
191 # default to returning the full matrix if no start/end provided
193 $end ||= $self->
size;
196 -DUMP_PATH => $self->dump_path,
197 -CACHE_FILE => $self->cache_file_name,
201 foreach my $key (sort keys %{ $self->{
'cache'}->{
'matrix'} }) {
203 next
if ($i < $start);
206 my ($source, $target) = split(/:/, $key);
207 $sub_matrix->add_score($source, $target,
208 $self->{
'cache'}->{
'matrix'}->{$key});
218 Example : $gene_scores->add_Entry($entry);
219 Description : Adds an Entry to the scoring matrix.
220 Return type : Float - the Entry
's score
221 Exceptions : thrown on wrong or missing argument
233 throw("Need a Bio::EnsEMBL::IdMapping::Entry");
236 return $self->add_score($entry->source, $entry->target, $entry->score);
242 Arg[1] : Bio::EnsEMBL::IdMapping::Entry $entry - Entry to update
243 Example : $gene_scores->update_Entry($entry);
244 Description : Updates an Entry (or rather its score) in the scoring matrix.
245 Actually delegates to add_Entry(), only there as an intuitively
247 Return type : Float - the Entry's score
248 Exceptions : thrown on wrong or missing argument
256 return $_[0]->add_Entry($_[1]);
261 # not needed in the current application, so not implemented
264 warning(
'Method ScoredMappingMatrix->remove_Entry not implemented (yet).');
270 Arg[1] : Int $source - source
object's internal Id ("dbID")
271 Arg[2] : Int $target - target object's
internal Id (
"dbID")
272 Arg[3] : Float $score - score for source/target pair
273 Example : $gene_scores->add_score(1234, 5678, 0.997);
274 Description : Adds a score for a source/target pair to the scoring matrix.
275 This is a low-level version of add_Entry().
276 Return type : Float - the score
290 # make sure you don't put duplicates on the source and target lists
291 unless (exists($self->{
'cache'}->{
'matrix'}->{
"$source:$target"})) {
292 push @{ $self->{
'cache'}->{
'source_list'}->{$source} }, $target;
293 push @{ $self->{
'cache'}->{
'target_list'}->{$target} }, $source;
296 $self->{
'cache'}->{
'matrix'}->{
"$source:$target"} = $score;
302 Arg[1] : Int $source - source
object's internal Id ("dbID")
303 Arg[2] : Int $target - target object's
internal Id (
"dbID")
304 Arg[3] : Float $score - score for source/target pair
305 Example : $gene_scores->set_score(1234, 5678, 0.997);
306 Description : Sets the score for a source/target pair in the scoring matrix.
307 This method is similar to add_score, but assumes that the Entry
308 has been added before, so won't update the sources and target
310 Return type : Float - the score
324 $self->{
'cache'}->{
'matrix'}->{
"$source:$target"} = $score;
330 Arg[1] : Int $source - source
object's internal Id ("dbID")
331 Arg[2] : Int $target - target object's
internal Id (
"dbID")
332 Example : my $entry = $gene_scores->get_Entry($source_gene->
id,
334 Description : Gets an Entry from the scoring matrix for a given source and
336 Return type :
Bio::
EnsEMBL::IdMapping::Entry or undef
349 if (exists($self->{
'cache'}->{
'matrix'}->{
"$source:$target"})) {
351 [$source, $target, $self->{
'cache'}->{
'matrix'}->{
"$source:$target"}]
361 Arg[1] : Int $source - source
object's internal Id ("dbID")
362 Arg[2] : Int $target - target object's
internal Id (
"dbID")
363 Example : my $score = $gene_scores->get_score($source_gene->
id,
365 Description : Gets the score from the scoring matrix for a given source and
367 Return type : Float or undef
381 if (exists($self->{
'cache'}->{
'matrix'}->{
"$source:$target"})) {
382 return $self->{
'cache'}->{
'matrix'}->{
"$source:$target"};
389 =head2 get_targets_for_source
391 Arg[1] : Int $source - source
object's internal Id ("dbID")
392 Example : my @targets = @{ $gene_scores->get_targets_for_source(1234) };
393 Description : Returns a list of all targets which have a score against a given
395 Return type : Arrayref of Int (target objects' internal Ids)
403 sub get_targets_for_source {
407 return $self->{
'cache'}->{
'source_list'}->{$source} || [];
411 =head2 get_Entries_for_source
413 Arg[1] : Int $source - source
object's internal Id ("dbID")
414 Example : my @entries = @{ $gene_scores->get_Entries_for_source(1234) };
415 Description : Returns a list of all Entries in the scoring matrix for a given
417 Return type : Arrayref of Bio::EnsEMBL::IdMapping::Entry objects
425 sub get_Entries_for_source {
429 return [ map { $self->get_Entry($source, $_) }
430 @{ $self->{'cache
'}->{'source_list
'}->{$source} || [] } ];
434 =head2 get_sources_for_target
436 Arg[1] : Int $target - target object's
internal Id (
"dbID")
437 Example : my @sources = @{ $gene_scores->get_sources_for_target(5678) };
438 Description : Returns a list of all sources which have a score against a given
440 Return type : Arrayref of Int (source objects
' internal Ids)
448 sub get_sources_for_target {
452 return $self->{'cache
'}->{'target_list
'}->{$target} || [];
456 =head2 get_Entries_for_target
458 Arg[1] : Int $target - target object's
internal Id (
"dbID")
459 Example : my @entries = @{ $gene_scores->get_Entries_for_target(5678) };
460 Description : Returns a list of all Entries in the scoring matrix
for a given
470 sub get_Entries_for_target {
474 return [
map { $self->get_Entry($_, $target) }
475 @{ $self->{
'cache'}->{
'target_list'}->{$target} || [] } ];
479 =head2 get_all_Entries
481 Example :
foreach my $entry (@{ $gene_scores->get_all_Entries }) {
482 # do something with the entry
484 Description : Returns a list of all Entries in the scoring matrix.
493 sub get_all_Entries {
498 foreach my $key (keys %{ $self->{
'cache'}->{
'matrix'} }) {
499 my ($source, $target) = split(/:/, $key);
501 [$source, $target, $self->{
'cache'}->{
'matrix'}->{$key}]
509 =head2 get_all_sources
511 Example : my @sources = @{ $gene_scores->get_all_sources };
512 Description : Returns a list of all sources in the scoring matrix.
513 Return type : Arrayref of Int (source objects
' internal Ids)
521 sub get_all_sources {
523 return [keys %{ $self->{'cache
'}->{'source_list
'} }];
527 =head2 get_all_targets
529 Example : my @targets = @{ $gene_scores->get_all_targets };
530 Description : Returns a list of all targets in the scoring matrix.
531 Return type : Arrayref of Int (target objects' internal Ids)
539 sub get_all_targets {
541 return [keys %{ $self->{
'cache'}->{
'target_list'} }];
545 =head2 get_entry_count
547 Example : my $num_entries = $gene_scores->get_entry_count;
548 Description : Returns the number of Entries in the scoring matrix.
557 sub get_entry_count {
559 return scalar(keys %{ $self->{
'cache'}->{
'matrix'} });
565 Example : my $size = $gene_scores->size;
566 Description : Returns the size of the scoring matrix. Same value as returned
567 by get_entry_count().
577 return $_[0]->get_entry_count;
581 =head2 get_source_count
583 Example : my $num_sources = $gene_scores->get_source_count;
584 Description : Returns the number of distinct sources in the scoring matrix.
593 sub get_source_count {
595 return scalar(keys %{ $self->{
'cache'}->{
'source_list'} });
599 =head2 get_target_count
601 Example : my $num_targets = $gene_scores->get_target_count;
602 Description : Returns the number of distinct targets in the scoring matrix.
611 sub get_target_count {
613 return scalar(keys %{ $self->{
'cache'}->{
'target_list'} });
617 =head2 get_min_max_scores
619 Example : my ($min_score, $max_score) =
620 @{ $gene_scores->get_min_max_scores };
621 Description : Returns the mininum and maximum score in the scoring matrix.
622 Return type : Arrayref of Float [min_score, max_score]
630 sub get_min_max_scores {
633 my @keys = keys %{ $self->{
'cache'}->{
'matrix'} };
635 return [undef, undef] unless (@keys);
637 # initialise; this should make loop quicker
638 my $min = $self->{
'cache'}->{
'matrix'}->{$keys[0]};
639 my $max = $self->{
'cache'}->{
'matrix'}->{$keys[0]};
641 foreach my $key (@keys) {
642 $min = $self->{
'cache'}->{
'matrix'}->{$key}
if ($min > $self->{
'cache'}->{
'matrix'}->{$key});
643 $max = $self->{
'cache'}->{
'matrix'}->{$key}
if ($max < $self->{
'cache'}->{
'matrix'}->{$key});
650 =head2 get_average_score
652 Example : my $avg_score = $gene_scores->get_average_score;
653 Description : Returns the average (mean) score in the matrix.
662 sub get_average_score {
665 my @keys = keys %{ $self->{
'cache'}->{
'matrix'} };
667 return undef unless (@keys);
671 foreach my $key (@keys) {
672 $total += $self->{
'cache'}->{
'matrix'}->{$key};
675 return $total/scalar(@keys);
683 Example : my $update_count = $gene_scores->
merge($more_gene_scores);
684 Description : Merges two scoring matrices. If there
's an Entry for a
685 source/target pair in both matrices, the higher score will be
687 Return type : Int - number of Entries added or updated
688 Exceptions : thrown on wrong or missing argument
710 foreach my $key ( keys %{ $matrix->{'cache
'}->{'matrix
'} } ) {
711 if ( !defined( $self->{'cache
'}->{'matrix
'}->{$key} )
712 or ( $self->{'cache
'}->{'matrix
'}->{$key} <
713 $matrix->{'cache
'}->{'matrix
'}->{$key} ) )
715 $self->{'cache
'}->{'matrix
'}->{$key} =
716 $matrix->{'cache
'}->{'matrix
'}->{$key};
721 # merge sources and target lists
722 foreach my $key ( keys %{ $matrix->{'cache
'}->{'source_list
'} } ) {
723 if ( defined( $self->{'cache
'}->{'source_list
'}->{$key} ) ) {
724 # need to merge lists
726 map { $_ => 1 } @{ $self->get_targets_for_source($key) };
727 map { $unique{$_} = 1 }
728 @{ $matrix->get_targets_for_source($key) };
729 $self->{'cache
'}->{'source_list
'}->{$key} = [ keys %unique ];
732 $self->{'cache
'}->{'source_list
'}->{$key} =
733 $matrix->{'cache
'}->{'source_list
'}->{$key};
737 foreach my $key ( keys %{ $matrix->{'cache
'}->{'target_list
'} } ) {
738 if ( defined( $self->{'cache
'}->{'target_list
'}->{$key} ) ) {
739 # need to merge lists
741 map { $_ => 1 } @{ $self->get_sources_for_target($key) };
742 map { $unique{$_} = 1 }
743 @{ $matrix->get_sources_for_target($key) };
744 $self->{'cache
'}->{'target_list
'}->{$key} = [ keys %unique ];
747 $self->{'cache
'}->{'target_list
'}->{$key} =
748 $matrix->{'cache
'}->{'target_list
'}->{$key};
758 Arg[1] : String $type - object type (e.g. 'gene
')
759 Arg[2] : String $dump_path - path for writing output
760 Example : $gene_scores->log('gene
', $conf->param('basedir
'));
761 Description : Logs all Entries in the scoring matrix to a file. Used for
764 Exceptions : thrown on I/0 error
774 my $dump_path = shift;
776 my $debug_path = path_append($dump_path, 'debug');
777 my $logfile = "$debug_path/${type}_scores.txt";
779 open(my $fh, '>
', $logfile) or
780 throw("Unable to open $logfile for writing: $!");
782 foreach my $entry (@{ $self->get_all_Entries }) {
783 print $fh ($entry->to_string."\n");
792 Example : print LOG $gene_scores->to_string, "\n";
793 Description : Returns a string representation of the scoring matrix. This is
794 simply a multi-line string, where each line is a stringified
796 Useful for debugging and logging.
810 foreach my $entry (@{ $self->get_all_Entries }) {
811 $string .= $entry->to_string."\n";