ensembl-hive  2.8.1
batch_id_history.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 =head1 NAME
19 
20 batch_id_history.pl - find stable IDs in the archive
21 
22 =head1 SYNOPSIS
23 
24 find_stable_ids_in_archive.pl [arguments]
25 
26 Required arguments:
27 
28  --host=hOST database host HOST
29  --port=PORT database port PORT
30  --user=USER database username USER
31  --dbname=NAME database name NAME
32  --stable_id_file=FILE read stable ID list from FILE
33 
34 Optional arguments:
35 
36  --pass=PASS database passwort PASS
37  --outfile=FILE write output to FILE
38  --pep_seq print peptide sequence
39 
40 
41 =head1 DESCRIPTION
42 
43 This script reads a list of stable IDs from a file and sees if it can find them
44 in the stable ID archive. It will print the ID history for each of them and
45 optinally the peptide sequence found there as well. Note that this will not
46 print the full history network, but rather branch out from your focus stable ID
47 only. If you are interested in the full network, have a look at
48 Bio::EnsEMBL::StableIdHistoryTree and related modules.
49 
50 
51 =head1 AUTHOR
52 
53 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
54 
55 =head1 CONTACT
56 
57 Please post comments/questions to the Ensembl development list
58 <http://lists.ensembl.org/mailman/listinfo/dev>
59 
60 =cut
61 
62 use strict;
63 use warnings;
64 no warnings 'uninitialized';
65 
66 use Getopt::Long;
70 
71 $| = 1;
72 
73 my ($host, $port, $user, $pass, $dbname, $stable_id_file, $outfile, $pep_seq);
74 
75 GetOptions(
76  "host=s", \$host,
77  "port=i", \$port,
78  "user=s", \$user,
79  "pass=s", \$pass,
80  "dbname=s", \$dbname,
81  "stable_id_file=s", \$stable_id_file,
82  "outfile=s", \$outfile,
83  "pep_seq", \$pep_seq,
84 );
85 
86 # check required params
87 unless ($host && $port && $user && $dbname && $stable_id_file) {
88  die "ERROR: Unable to run script.\nNeed host, port, user, dbname and stable_id_file parameters.\n";
89 }
90 
91 # connect to database and get adaptors
93  -HOST => $host,
94  -PORT => $port,
95  -USER => $user,
96  -PASS => $pass,
97  -DBNAME => $dbname,
98 );
99 
100 my $aa = $db->get_ArchiveStableIdAdaptor;
101 
102 # read list of stable IDs from file
103 my $infh;
104 open($infh, "<", $stable_id_file) or
105  die("Can't open $stable_id_file for reading: $!");
106 
107 # get output filehandle
108 my $outfh;
109 if ($outfile) {
110  open($outfh, ">", $outfile) or die("Can't open $outfile for writing: $!");
111 } else {
112  $outfh = \*STDOUT;
113 }
114 
115 while (my $sid = <$infh>) {
116 
117  # skip comments and empty lines
118  next if (/^#/ or /^\s?\n$/);
119 
120  chomp($sid);
121  print $outfh "\n$sid\n\n";
122 
123  my $archive_id = $aa->fetch_by_stable_id($sid);
124 
125  unless ($archive_id) {
126  print $outfh " Not found in database.\n";
127  next;
128  }
129 
130  my $history = $archive_id->get_history_tree;
131  next unless $history;
132 
133  if ($history->is_incomplete) {
134  print $outfh " NOTE: History tree is incomplete.\n\n";
135  }
136 
137  my $matrix = [];
138 
139  # get unique stable IDs (regardless of version)
140  my @unique_ids = @{ $history->get_unique_stable_ids };
141 
142  my $i = 0;
143  foreach my $id (@unique_ids) {
144  $matrix->[$i++]->[0] = $id;
145  }
146 
147  # get all releases for which we have nodes in this graph
148  my @releases = @{ $history->get_release_display_names };
149 
150  my $j = 1;
151  foreach my $release (@releases) {
152  $matrix->[scalar(@unique_ids)]->[$j++] = $release;
153  }
154 
155  # print a "graphical" representation of the tree
156  my $fmt = " %-20s" . ("%-6s" x scalar(@releases)) . "\n";
157 
158  foreach my $a_id (@{ $history->get_all_ArchiveStableIds }) {
159  my ($x, $y) = @{ $history->coords_by_ArchiveStableId($a_id) };
160  $matrix->[$y]->[$x+1] = $a_id->version;
161  }
162 
163  for (my $i = 0; $i < @$matrix; $i++) {
164  print $outfh sprintf($fmt, @{ $matrix->[$i] });
165  }
166 
167  # current versions in history
168  print $outfh "\n Current stable IDs in this tree:\n";
169  my @current = @{ $history->get_all_current_ArchiveStableIds };
170  if (@current) {
171  map { print $outfh " ".$_->stable_id.".".$_->version."\n" } @current;
172  } else {
173  print $outfh " none\n";
174  }
175 
176 }
177 
178 close($infh);
179 close($outfh);
180 
181 
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
map
public map()
Bio::EnsEMBL::StableIdHistoryTree
Definition: StableIdHistoryTree.pm:73
archive
public archive()
Bio::EnsEMBL::Analysis
Definition: PairAlign.pm:3
Bio::EnsEMBL::SimpleFeature
Definition: SimpleFeature.pm:31