3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefMapper::ProcessPrioritys;
34 # Process the priority xrefs.
37 # 1) create a list of source "names" that are priority xrefs
39 # 2) Just to be sure set all ox_status in object_xref to 'DUMP_OUT'
40 # set dumped in xref to NULL
42 # 3) for each of the source names
43 # set ox_status to 'FAILED_PRIORITY' for those not the best match
44 # Also do this for its depenedents
48 my($class, $mapper) = @_;
52 # $self->core($mapper->core);
53 $self->
xref($mapper->xref);
54 $self->verbose($mapper->verbose);
58 sub get_priority_names{
59 my ($self, $dbi) = @_;
62 my $psth = $dbi->prepare(
"select s.priority_description, s.name from source s, xref x where x.source_id = s.source_id group by s.priority_description, s.name order by s.name") || die
"prepare failed";
63 $psth->execute() || die
"execute failed";
68 my $last_name =
"rubbish";
70 $psth->bind_columns(\$desc,\$name);
71 while($psth->fetch()){
72 if($name eq $last_name and !defined($seen{$name})){
86 my $dbi = $self->xref->dbc;
87 my @names = $self->get_priority_names($dbi);
89 print
"The following will be processed as priority xrefs\n" if($self->verbose);
90 foreach my $name (@names){
91 print
"\t$name\n" if($self->verbose);
94 my $update_ox_sth = $dbi->prepare(
'update object_xref set ox_status = "FAILED_PRIORITY" where object_xref_id = ?');
95 my $update_x_sth = $dbi->prepare(
"update xref set dumped = 'NO_DUMP_ANOTHER_PRIORITY' where xref_id = ?");
97 # 1) Set to failed all those that have no object xrefs.
101 FROM source s, xref x
102 LEFT JOIN object_xref ox ON ox.xref_id = x.xref_id
103 WHERE x.source_id = s.source_id
105 AND ox.object_xref_id is
null
108 my $f_sth = $dbi->prepare($f_sql);
109 foreach my $name (@names){
110 $f_sth->execute($name);
112 $f_sth->bind_columns(\$xref_id);
113 while($f_sth->fetch()){
114 $update_x_sth->execute($xref_id);
121 # Now ALL object_xrefs have an identity_xref :-)
122 # So we can do a straight join and treat all info_types the same way.
124 my $new_sql =(<<NEWS);
125 SELECT ox.object_xref_id, x.accession, x.xref_id, (ix.query_identity + ix.target_identity) as identity, ox.ox_status, ox.ensembl_object_type, ensembl_id, info_type
126 FROM object_xref ox, xref x, source s, identity_xref ix
127 WHERE ox.object_xref_id = ix.object_xref_id
128 AND ox.xref_id = x.xref_id
129 AND s.source_id = x.source_id
131 ORDER BY x.accession DESC, s.priority ASC , identity DESC, x.xref_id DESC
133 my $new_sth = $dbi->prepare($new_sql);
135 # Query to copy identity_xref values from one xref to another
136 # This is to keep alignment information event if alignment was not the best match
139 my $idx_copy_sql = (<<IDXCP);
140 UPDATE identity_xref SET query_identity = ?, target_identity = ?, hit_start = ?, hit_end = ?, translation_start = ?, translation_end = ?, cigar_line = ?, score = ?, evalue = ?
141 WHERE object_xref_id = ?;
144 my $idx_copy_sth = $dbi->prepare($idx_copy_sql);
147 # Query to copy synonyms from one xref to another
150 my $syn_copy_sql = (<<SYNCP);
151 INSERT IGNORE INTO synonym (SELECT ?, synonym FROM synonym
155 my $syn_copy_sth = $dbi->prepare($syn_copy_sql);
157 my $best_ox_sth = $dbi->prepare(
"SELECT object_xref_id FROM object_xref WHERE xref_id = ? and ensembl_object_type = ? and ensembl_id = ?");
159 my $seq_score_sql = (<<SEQCP);
160 SELECT query_identity, target_identity, hit_start, hit_end, translation_start, translation_end, cigar_line, score, evalue
161 FROM identity_xref WHERE object_xref_id = ?
163 my $seq_score_sth = $dbi->prepare($seq_score_sql);
166 foreach my $name (@names){
167 $new_sth->execute($name);
168 my ($object_xref_id, $acc, $xref_id, $identity, $status, $object_type, $ensembl_id, $info_type);
169 $new_sth->bind_columns(\$object_xref_id, \$acc, \$xref_id, \$identity, \$status, \$object_type, \$ensembl_id, \$info_type);
172 my $best_xref_id = undef;
173 my @best_ensembl_id = undef;
174 my $last_xref_id = 0;
176 my @gone; # list of xref_ids that we
've already seen for this accession
177 while($new_sth->fetch){
178 if($last_acc eq $acc){
179 if($xref_id != $best_xref_id){
180 # We've already seen
this accession before, and
this xref_id is not the best one
182 $seen = ($xref_id == $last_xref_id);
184 $last_xref_id = $xref_id;
185 # If xref is a sequence_match, we want to copy the alignment identity_xref to prioritised mappings of the same ensembl_id
186 if ($info_type eq
'SEQUENCE_MATCH') {
187 my ($query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue, $best_object_xref_id);
188 $seq_score_sth->execute($object_xref_id);
189 $seq_score_sth->bind_columns(\$query_identity, \$target_identity, \$hit_start, \$hit_end, \$translation_start, \$translation_end, \$cigar_line, \$score, \$evalue);
190 $seq_score_sth->fetch();
191 $best_ox_sth->execute($best_xref_id, $object_type, $ensembl_id);
192 $best_ox_sth->bind_columns(\$best_object_xref_id);
193 $best_ox_sth->fetch();
194 $idx_copy_sth->execute($query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue, $best_object_xref_id);
196 # If the xref is marked DUMP_OUT, set it to FAILED_PRIORITY
197 if($status eq
"DUMP_OUT"){
198 $update_ox_sth->execute($object_xref_id);
199 ## If it is the first time processing this xref_id, also process dependents and update status
201 $update_x_sth->execute($xref_id);
202 # Copy synonyms across if they are missing
203 $syn_copy_sth->execute($best_xref_id, $xref_id);
204 $self->process_dependents($xref_id, $best_xref_id, $dbi);
208 $update_x_sth->execute($xref_id);
211 # Alignment did not pass, dismiss
212 if ($status eq
'FAILED_CUTOFF') {
215 ## There might be several mappings for the best priority
216 push @best_ensembl_id, $ensembl_id;
218 if(@gone){ #best priority failed so another one now found so set dumped;
219 if($last_name eq $acc){
220 foreach my $d (@gone){
221 $update_x_sth->execute($d);
227 if($status eq
"DUMP_OUT"){
229 $best_xref_id = $xref_id;
230 @best_ensembl_id = ($ensembl_id);
231 if(@gone and $last_name eq $acc){
232 foreach my $d (@gone){
233 $update_x_sth->execute($d);
238 else{ #
new xref_id not DUMP_OUT
239 if ($last_name ne $acc) { @gone = () } #
new accession
240 push @gone, $xref_id;
248 $update_ox_sth->finish;
249 $update_x_sth->finish;
250 $seq_score_sth->finish;
251 $best_ox_sth->finish;
252 $idx_copy_sth->finish;
253 $syn_copy_sth->finish;
255 my $sth = $dbi->prepare(
"insert into process_status (status, date) values('prioritys_flagged',now())");
260 sub process_dependents{
261 # master xref IDs are entries for the current accession via various methods. We take dependent xrefs from the old and add to the new
262 my ($self, $old_master_xref_id, $new_master_xref_id, $dbi) = @_;
265 my $matching_ens_sth = $dbi->prepare(
"select distinct ensembl_object_type, ensembl_id from object_xref where ox_status not in ('FAILED_CUTOFF') and xref_id = ? order by ensembl_object_type");
266 my $dep_sth = $dbi->prepare(
"select distinct dx.dependent_xref_id, dx.linkage_annotation, dx.linkage_source_id, ox.ensembl_object_type from dependent_xref dx, object_xref ox where ox.xref_id = dx.dependent_xref_id and ox.master_xref_id = dx.master_xref_id and dx.master_xref_id = ? order by ox.ensembl_object_type");
267 my $insert_dep_x_sth = $dbi->prepare(
"insert into dependent_xref(master_xref_id, dependent_xref_id, linkage_annotation, linkage_source_id) values(?, ?, ?, ?)");
268 my $insert_dep_ox_sth = $dbi->prepare(
"insert ignore into object_xref(master_xref_id, ensembl_object_type, ensembl_id, linkage_type, ox_status, xref_id) values(?, ?, ?, 'DEPENDENT', 'DUMP_OUT', ?)");
269 my $dep_ox_sth = $dbi->prepare(
"select object_xref_id from object_xref where master_xref_id = ? and ensembl_object_type = ? and ensembl_id = ? and linkage_type = 'DEPENDENT' AND ox_status = 'DUMP_OUT' and xref_id = ?");
270 my $insert_ix_sth = $dbi->prepare(
"insert ignore into identity_xref(object_xref_id, query_identity, target_identity) values(?, 100, 100)");
272 my @master_xrefs = ($old_master_xref_id);
275 my ($new_object_type, $new_ensembl_id, $old_object_type, $old_ensembl_id);
276 my ($dep_xref_id, $linkage_annotation, $new_object_xref_id, $linkage_source_id, $object_type);
279 # Create a hash of all possible mappings for this accession
281 $matching_ens_sth->execute($new_master_xref_id);
282 $matching_ens_sth->bind_columns(\$new_object_type, \$new_ensembl_id);
283 while ($matching_ens_sth->fetch()) {
284 push @{ $ensembl_ids{$new_object_type} }, $new_ensembl_id;
287 $matching_ens_sth->execute($old_master_xref_id);
288 $matching_ens_sth->bind_columns(\$old_object_type, \$old_ensembl_id);
289 while ($matching_ens_sth->fetch()) {
290 push @{ $old_ensembl_ids{$old_object_type} }, $old_ensembl_id;
294 ## Loop through all dependent xrefs of old master xref, and recurse
295 while(my $xref_id = pop(@master_xrefs)){
297 # Get dependent xrefs, be they gene, transcript or translation
298 $dep_sth->execute($xref_id);
299 $dep_sth->bind_columns(\$dep_xref_id, \$linkage_annotation, \$linkage_source_id, \$object_type);
301 $new_master_xref_id = $xref_id;
303 while($dep_sth->fetch()){
306 # Remove all mappings to low priority xrefs
307 # Then delete any leftover identity or go xrefs of it
308 foreach my $ensembl_id (@{ $old_ensembl_ids{$object_type}} ) {
309 $self->_detach_object_xref($xref_id, $dep_xref_id, $object_type, $ensembl_id, $dbi);
312 # Duplicate each dependent for the new master xref if it is the first in the chain
313 unless ($recursive) {
314 $insert_dep_x_sth->execute($new_master_xref_id, $dep_xref_id, $linkage_annotation, $linkage_source_id);
317 # Loop through all chosen (best) ensembl ids mapped to priority xref, and connect them with object_xrefs
318 foreach my $ensembl_id (@{ $ensembl_ids{$object_type} }) {
319 # Add new object_xref for each best_ensembl_id.
320 $insert_dep_ox_sth->execute($new_master_xref_id, $object_type, $ensembl_id, $dep_xref_id);
322 $dep_ox_sth->execute($new_master_xref_id, $object_type, $ensembl_id, $dep_xref_id);
323 $dep_ox_sth->bind_columns(\$new_object_xref_id);
324 while ($dep_ox_sth->fetch()) {
325 $insert_ix_sth->execute($new_object_xref_id);
328 unless ($dep_xref_id == $xref_id) {
329 push @master_xrefs, $dep_xref_id; # remember chained dependent xrefs
335 $matching_ens_sth->finish();
337 $insert_dep_x_sth->finish();
338 $insert_dep_ox_sth->finish();
339 $dep_ox_sth->finish();
340 $insert_ix_sth->finish();
343 # Delete identity xrefs for a given object xref
344 # Set unimportant object_xrefs to FAILED_PRIORITY, and delete all those that remain
345 sub _detach_object_xref {
347 my ($xref_id, $dep_xref_id, $object_type, $ensembl_id, $dbi) = @_;
348 # Drop all the identity and go xrefs for the dependents of an xref
349 my $remove_dep_ox_sth = $dbi->prepare(
350 "DELETE ix FROM object_xref ox \
351 LEFT JOIN identity_xref ix ON ix.object_xref_id = ox.object_xref_id \
352 WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ensembl_id = ?"
354 # Fail the object_xrefs that did link to the deleted identity/go xrefs.
355 # This only updates one of potentially many, due to table contraints.
356 my $update_dep_ox_sth = $dbi->prepare(
357 "UPDATE IGNORE object_xref SET ox_status = 'FAILED_PRIORITY' \
358 WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ox_status = 'DUMP_OUT' AND ensembl_id = ?"
360 # This deletes everything left behind by the previous query.
361 my $clean_dep_ox_sth = $dbi->prepare(
362 "DELETE FROM object_xref \
363 WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ox_status = 'DUMP_OUT' AND ensembl_id = ?"
366 $remove_dep_ox_sth->execute($xref_id, $object_type, $dep_xref_id, $ensembl_id);
367 # change status of object_xref to FAILED_PRIORITY for record keeping
368 $update_dep_ox_sth->execute($xref_id, $object_type, $dep_xref_id, $ensembl_id);
369 # delete the duplicates.
370 $clean_dep_ox_sth->execute($xref_id, $object_type, $dep_xref_id, $ensembl_id);
372 $remove_dep_ox_sth->finish();
373 $update_dep_ox_sth->finish();
374 $clean_dep_ox_sth->finish();