2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 # Figure out which seq_regions are non-redundant and set the appropriate
18 # attribute in seq_region_attrib
29 my ($host, $port, $user, $password, $db, $verbose, $check);
35 GetOptions (
'host=s' => \$host,
37 'password=s' => \$password,
40 'verbose' => \$verbose,
44 die
"Host must be specified" unless $host;
45 die
"Database must be specified" unless $db;
47 my $dbi = DBI->connect(
"dbi:mysql:host=$host;port=$port;database=$db",
"$user",
"$password", {
'RaiseError' => 1}) || die
"Can't connect to target DB";
49 # ----------------------------------------
51 # check that there is an entry in the attrib_type table for nonredundant
52 # if there is, cache the ID for later; if not, make one
53 my $nr_attrib_type_id;
54 my $sth = $dbi->prepare(
"SELECT attrib_type_id FROM attrib_type WHERE code='nonredundant'");
56 while ( my @row= $sth->fetchrow_array()) {
57 $nr_attrib_type_id = $row[0];
61 if ($nr_attrib_type_id) {
63 debug(
"Attribute with name nonredundant already set in attrib_type with attrib_type_id " . $nr_attrib_type_id);
67 debug(
"Attribute with name nonredundant not set in attrib_type; adding ...");
69 $sth = $dbi->prepare(
"INSERT INTO attrib_type (code, name) VALUES ('nonredundant', 'Non-redundant sequence region')");
72 $nr_attrib_type_id = $sth->{mysql_insertid};
74 debug(
"Added nonredundant attribute with ID " . $nr_attrib_type_id);
78 # ----------------------------------------
87 my $slice_adaptor = $db_adaptor->get_SliceAdaptor();
89 # Assume all entries in the top-level co-ordinate system are non-redundant
90 my @toplevel = @{$slice_adaptor->fetch_all(
'toplevel')};
91 debug(
"Got " . @toplevel .
" sequence regions in the top-level co-ordinate system");
94 # Rest of the co-ordinate systems, in "ascending" order
97 debug(
"Starting pair-wise co-ordinate system comparison");
99 my @nr_slices; # will store non-redundant ones
for later
101 for (my $lower_cs_idx = 0; $lower_cs_idx < @coord_systems; $lower_cs_idx++) {
102 for (my $higher_cs_idx = $lower_cs_idx+1; $higher_cs_idx < @coord_systems; $higher_cs_idx++) {
104 my $higher_cs = $coord_systems[$higher_cs_idx];
105 my $lower_cs = $coord_systems[$lower_cs_idx];
107 debug(
"$lower_cs:$higher_cs");
109 # we are interested in the slices that do *not* project onto the "higher" coordinate system
110 my @slices = @{$slice_adaptor->fetch_all($lower_cs)};
112 my $projected_hit = 0;
113 my $projected_miss = 0;
115 foreach my $slice (@slices) {
116 my @projected = @{$slice->project($higher_cs)};
117 if (@projected > 0) {
121 push @nr_slices, $slice;
125 debug (
"Projecting " . $lower_cs .
" onto " . $higher_cs .
": " .
126 $projected_hit .
" hit, " . $projected_miss .
" miss out of " . @slices .
" total");
135 #----------------------------------------
142 # ----------------------------------------
143 # Get all the co-ordinate systems in order
144 # Order is descending average length
145 # Return an array of co-ordinate system names
149 my $db_adaptor = shift;
150 my $slice_adaptor = shift;
152 debug(
"Ordering co-ordinate systems by average length");
154 my $cs_adaptor = $db_adaptor->get_CoordSystemAdaptor();
155 my @coord_system_objs = @{$cs_adaptor->fetch_all()};
157 # Calculate average lengths
159 foreach my $cs (@coord_system_objs) {
160 my @slices = @{$slice_adaptor->fetch_all($cs->name())};
162 foreach my $slice (@slices) {
163 $total_len += $slice->length();
165 if ($total_len > 0) {
166 $lengths{$cs->name()} = $total_len / scalar(@slices);
168 $lengths{$cs->name()} = 0;
169 print
"Warning - total length for " . $cs->name() .
" is zero!\n";
173 my @coord_systems = sort { $lengths{$a} <=> $lengths{$b} } keys %lengths;
175 foreach my $cs_name (@coord_systems) {
176 debug(
"Co-ord system: " . $cs_name .
" Average length: " .
int $lengths{$cs_name});
179 debug(
"Got co-ordinate systems in order: " . join(
', ', @coord_systems));
181 return @coord_systems;
185 # ----------------------------------------------------------------------
186 # Misc / utility functions
190 print
"Usage: perl set_non_redundant_attribs.pl {options}\n";
191 print
"Where options are:\n";
192 print
" --host {hostname} The database host.\n";
193 print
" --user {username} The database user. Must have write permissions\n";
194 print
" --password {pass} The password for user, if required.\n";
195 print
" --port {folder} The database port to use.\n";
196 print
" --db {schema} The name of the database\n";
197 print
" --check Read back non-redundant slices from SliceAdaptor\n";
198 print
" --verbose Print extra output information\n";
202 # ----------------------------------------------------------------------
208 print $str .
"\n" if $verbose;
212 # ----------------------------------------------------------------------
214 # Set the "nonredundant" attribute on a Slice or group of Slices
215 # arg 1: SliceAdaptor
216 # arg 2: internal ID of 'nonredundant' attrib_type
217 # arg 3: DB connection
222 my ($slice_adaptor, $nr_attrib_type_id, $dbi, @targets) = @_;
224 debug(
"Setting nonredundant attribute on " . @targets .
" sequence regions");
226 my $sth = $dbi->prepare(
"INSERT INTO seq_region_attrib (seq_region_id, attrib_type_id) VALUES (?,?)");
228 foreach my $slice (@targets) {
230 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
232 $sth->execute($seq_region_id, $nr_attrib_type_id);
240 # ----------------------------------------------------------------------
244 my $slice_adaptor = shift;
246 my @all = @{$slice_adaptor->fetch_all_non_redundant()};
248 print
"Got " . @all .
" non-redundant seq_regions from SliceAdaptor\n";