ensembl-hive  2.8.1
set_nonredundant_attribs.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 # Figure out which seq_regions are non-redundant and set the appropriate
18 # attribute in seq_region_attrib
19 
20 use strict;
21 use warnings;
22 
23 use DBI;
24 use Getopt::Long;
25 
28 
29 my ($host, $port, $user, $password, $db, $verbose, $check);
30 $host = "127.0.0.1";
31 $port = 3306;
32 $password = "";
33 $user = "ensro";
34 
35 GetOptions ('host=s' => \$host,
36  'user=s' => \$user,
37  'password=s' => \$password,
38  'port=s' => \$port,
39  'db=s' => \$db,
40  'verbose' => \$verbose,
41  'check' => \$check,
42  'help' => sub { &show_help(); exit 1;} );
43 
44 die "Host must be specified" unless $host;
45 die "Database must be specified" unless $db;
46 
47 my $dbi = DBI->connect("dbi:mysql:host=$host;port=$port;database=$db", "$user", "$password", {'RaiseError' => 1}) || die "Can't connect to target DB";
48 
49 # ----------------------------------------
50 
51 # check that there is an entry in the attrib_type table for nonredundant
52 # if there is, cache the ID for later; if not, make one
53 my $nr_attrib_type_id;
54 my $sth = $dbi->prepare("SELECT attrib_type_id FROM attrib_type WHERE code='nonredundant'");
55 $sth->execute();
56 while ( my @row= $sth->fetchrow_array()) {
57  $nr_attrib_type_id = $row[0];
58 }
59 $sth->finish();
60 
61 if ($nr_attrib_type_id) {
62 
63  debug("Attribute with name nonredundant already set in attrib_type with attrib_type_id " . $nr_attrib_type_id);
64 
65 } else {
66 
67  debug("Attribute with name nonredundant not set in attrib_type; adding ...");
68 
69  $sth = $dbi->prepare("INSERT INTO attrib_type (code, name) VALUES ('nonredundant', 'Non-redundant sequence region')");
70  $sth->execute();
71 
72  $nr_attrib_type_id = $sth->{mysql_insertid};
73 
74  debug("Added nonredundant attribute with ID " . $nr_attrib_type_id);
75 
76 }
77 
78 # ----------------------------------------
79 
80 my $db_adaptor = new Bio::EnsEMBL::DBSQL::DBAdaptor(-user => $user,
81  -dbname => $db,
82  -host => $host,
83  -port => $port,
84  -pass => $password,
85  -driver => 'mysql' );
86 
87 my $slice_adaptor = $db_adaptor->get_SliceAdaptor();
88 
89 # Assume all entries in the top-level co-ordinate system are non-redundant
90 my @toplevel = @{$slice_adaptor->fetch_all('toplevel')};
91 debug("Got " . @toplevel . " sequence regions in the top-level co-ordinate system");
92 set_nr_attribute($slice_adaptor, $nr_attrib_type_id, $dbi, @toplevel);
93 
94 # Rest of the co-ordinate systems, in "ascending" order
95 my @coord_systems = get_coord_systems_in_order($db_adaptor, $slice_adaptor);
96 
97 debug("Starting pair-wise co-ordinate system comparison");
98 
99 my @nr_slices; # will store non-redundant ones for later
100 
101 for (my $lower_cs_idx = 0; $lower_cs_idx < @coord_systems; $lower_cs_idx++) {
102  for (my $higher_cs_idx = $lower_cs_idx+1; $higher_cs_idx < @coord_systems; $higher_cs_idx++) {
103 
104  my $higher_cs = $coord_systems[$higher_cs_idx];
105  my $lower_cs = $coord_systems[$lower_cs_idx];
106 
107  debug("$lower_cs:$higher_cs");
108 
109  # we are interested in the slices that do *not* project onto the "higher" coordinate system
110  my @slices = @{$slice_adaptor->fetch_all($lower_cs)};
111 
112  my $projected_hit = 0;
113  my $projected_miss = 0;
114 
115  foreach my $slice (@slices) {
116  my @projected = @{$slice->project($higher_cs)};
117  if (@projected > 0) {
118  $projected_hit++;
119  } else {
120  $projected_miss++;
121  push @nr_slices, $slice;
122  }
123  undef @projected;
124  }
125  debug ("Projecting " . $lower_cs . " onto " . $higher_cs . ": " .
126  $projected_hit . " hit, " . $projected_miss . " miss out of " . @slices . " total");
127 
128  undef @slices;
129 
130  }
131 }
132 
133 set_nr_attribute($slice_adaptor, $nr_attrib_type_id, $dbi, @nr_slices);
134 
135 #----------------------------------------
136 
137 check_non_redundant($slice_adaptor) if $check;
138 
139 $sth->finish();
140 $dbi->disconnect();
141 
142 # ----------------------------------------
143 # Get all the co-ordinate systems in order
144 # Order is descending average length
145 # Return an array of co-ordinate system names
146 
148 
149  my $db_adaptor = shift;
150  my $slice_adaptor = shift;
151 
152  debug("Ordering co-ordinate systems by average length");
153 
154  my $cs_adaptor = $db_adaptor->get_CoordSystemAdaptor();
155  my @coord_system_objs = @{$cs_adaptor->fetch_all()};
156 
157  # Calculate average lengths
158  my %lengths;
159  foreach my $cs (@coord_system_objs) {
160  my @slices = @{$slice_adaptor->fetch_all($cs->name())};
161  my $total_len = 0;
162  foreach my $slice (@slices) {
163  $total_len += $slice->length();
164  }
165  if ($total_len > 0) {
166  $lengths{$cs->name()} = $total_len / scalar(@slices);
167  } else {
168  $lengths{$cs->name()} = 0;
169  print "Warning - total length for " . $cs->name() . " is zero!\n";
170  }
171  }
172 
173  my @coord_systems = sort { $lengths{$a} <=> $lengths{$b} } keys %lengths;
174 
175  foreach my $cs_name (@coord_systems) {
176  debug("Co-ord system: " . $cs_name . " Average length: " . int $lengths{$cs_name});
177  }
178 
179  debug("Got co-ordinate systems in order: " . join(', ', @coord_systems));
180 
181  return @coord_systems;
182 
183 }
184 
185 # ----------------------------------------------------------------------
186 # Misc / utility functions
187 
188 sub show_help {
189 
190  print "Usage: perl set_non_redundant_attribs.pl {options}\n";
191  print "Where options are:\n";
192  print " --host {hostname} The database host.\n";
193  print " --user {username} The database user. Must have write permissions\n";
194  print " --password {pass} The password for user, if required.\n";
195  print " --port {folder} The database port to use.\n";
196  print " --db {schema} The name of the database\n";
197  print " --check Read back non-redundant slices from SliceAdaptor\n";
198  print " --verbose Print extra output information\n";
199 
200 }
201 
202 # ----------------------------------------------------------------------
203 
204 sub debug {
205 
206  my $str = shift;
207 
208  print $str . "\n" if $verbose;
209 
210 }
211 
212 # ----------------------------------------------------------------------
213 
214 # Set the "nonredundant" attribute on a Slice or group of Slices
215 # arg 1: SliceAdaptor
216 # arg 2: internal ID of 'nonredundant' attrib_type
217 # arg 3: DB connection
218 # arg 3..n: Slices
219 
220 sub set_nr_attribute {
221 
222  my ($slice_adaptor, $nr_attrib_type_id, $dbi, @targets) = @_;
223 
224  debug("Setting nonredundant attribute on " . @targets . " sequence regions");
225 
226  my $sth = $dbi->prepare("INSERT INTO seq_region_attrib (seq_region_id, attrib_type_id) VALUES (?,?)");
227 
228  foreach my $slice (@targets) {
229 
230  my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
231 
232  $sth->execute($seq_region_id, $nr_attrib_type_id);
233 
234  }
235 
236  $sth->finish();
237 
238 }
239 
240 # ----------------------------------------------------------------------
241 
243 
244  my $slice_adaptor = shift;
245 
246  my @all = @{$slice_adaptor->fetch_all_non_redundant()};
247 
248  print "Got " . @all . " non-redundant seq_regions from SliceAdaptor\n";
249 
250 }
251 
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
set_nr_attribute
public set_nr_attribute()
Bio::EnsEMBL::DBSQL::SliceAdaptor
Definition: SliceAdaptor.pm:78
debug
public debug()
show_help
public show_help()
check_non_redundant
public check_non_redundant()
get_coord_systems_in_order
public get_coord_systems_in_order()