ensembl-hive  2.8.1
ProcessPrioritys.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefMapper::ProcessPrioritys;
21 use strict;
22 
23 use vars '@ISA';
24 @ISA = qw{ XrefMapper::BasicMapper };
25 
26 use warnings;
28 
29 use Cwd;
30 use DBI;
31 use File::Basename;
32 use IPC::Open3;
33 
34 # Process the priority xrefs.
35 
36 #
37 # 1) create a list of source "names" that are priority xrefs
38 #
39 # 2) Just to be sure set all ox_status in object_xref to 'DUMP_OUT'
40 # set dumped in xref to NULL
41 #
42 # 3) for each of the source names
43 # set ox_status to 'FAILED_PRIORITY' for those not the best match
44 # Also do this for its depenedents
45 #
46 
47 sub new {
48  my($class, $mapper) = @_;
49 
50  my $self ={};
51  bless $self,$class;
52 # $self->core($mapper->core);
53  $self->xref($mapper->xref);
54  $self->verbose($mapper->verbose);
55  return $self;
56 }
57 
58 sub get_priority_names{
59  my ($self, $dbi) = @_;
60 
61 
62  my $psth = $dbi->prepare("select s.priority_description, s.name from source s, xref x where x.source_id = s.source_id group by s.priority_description, s.name order by s.name") || die "prepare failed";
63  $psth->execute() || die "execute failed";
64 
65  my @names;
66  my %seen;
67 
68  my $last_name = "rubbish";
69  my ($desc,$name);
70  $psth->bind_columns(\$desc,\$name);
71  while($psth->fetch()){
72  if($name eq $last_name and !defined($seen{$name})){
73  push @names, $name;
74  $seen{$name} = 1;
75  }
76  $last_name = $name;
77  }
78 
79  return @names;
80 }
81 
82 
83 sub process {
84  my ($self) = @_;
85 
86  my $dbi = $self->xref->dbc;
87  my @names = $self->get_priority_names($dbi);
88 
89  print "The following will be processed as priority xrefs\n" if($self->verbose);
90  foreach my $name (@names){
91  print "\t$name\n" if($self->verbose);
92  }
93 
94  my $update_ox_sth = $dbi->prepare('update object_xref set ox_status = "FAILED_PRIORITY" where object_xref_id = ?');
95  my $update_x_sth = $dbi->prepare("update xref set dumped = 'NO_DUMP_ANOTHER_PRIORITY' where xref_id = ?");
96 
97  # 1) Set to failed all those that have no object xrefs.
98 
99  my $f_sql =(<<FSQL);
100  SELECT x.xref_id
101  FROM source s, xref x
102  LEFT JOIN object_xref ox ON ox.xref_id = x.xref_id
103  WHERE x.source_id = s.source_id
104  AND s.name = ?
105  AND ox.object_xref_id is null
106 FSQL
107 
108  my $f_sth = $dbi->prepare($f_sql);
109  foreach my $name (@names){
110  $f_sth->execute($name);
111  my ($xref_id);
112  $f_sth->bind_columns(\$xref_id);
113  while($f_sth->fetch()){
114  $update_x_sth->execute($xref_id);
115  }
116  }
117  $f_sth->finish;
118 
119 
120  #
121  # Now ALL object_xrefs have an identity_xref :-)
122  # So we can do a straight join and treat all info_types the same way.
123  #
124  my $new_sql =(<<NEWS);
125  SELECT ox.object_xref_id, x.accession, x.xref_id, (ix.query_identity + ix.target_identity) as identity, ox.ox_status, ox.ensembl_object_type, ensembl_id, info_type
126  FROM object_xref ox, xref x, source s, identity_xref ix
127  WHERE ox.object_xref_id = ix.object_xref_id
128  AND ox.xref_id = x.xref_id
129  AND s.source_id = x.source_id
130  AND s.name = ?
131  ORDER BY x.accession DESC, s.priority ASC , identity DESC, x.xref_id DESC
132 NEWS
133  my $new_sth = $dbi->prepare($new_sql);
134  #
135  # Query to copy identity_xref values from one xref to another
136  # This is to keep alignment information event if alignment was not the best match
137  #
138 
139  my $idx_copy_sql = (<<IDXCP);
140  UPDATE identity_xref SET query_identity = ?, target_identity = ?, hit_start = ?, hit_end = ?, translation_start = ?, translation_end = ?, cigar_line = ?, score = ?, evalue = ?
141  WHERE object_xref_id = ?;
142 IDXCP
143 
144  my $idx_copy_sth = $dbi->prepare($idx_copy_sql);
145 
146  #
147  # Query to copy synonyms from one xref to another
148  #
149 
150  my $syn_copy_sql = (<<SYNCP);
151  INSERT IGNORE INTO synonym (SELECT ?, synonym FROM synonym
152  WHERE xref_id = ?);
153 SYNCP
154 
155  my $syn_copy_sth = $dbi->prepare($syn_copy_sql);
156 
157  my $best_ox_sth = $dbi->prepare("SELECT object_xref_id FROM object_xref WHERE xref_id = ? and ensembl_object_type = ? and ensembl_id = ?");
158 
159  my $seq_score_sql = (<<SEQCP);
160  SELECT query_identity, target_identity, hit_start, hit_end, translation_start, translation_end, cigar_line, score, evalue
161  FROM identity_xref WHERE object_xref_id = ?
162 SEQCP
163  my $seq_score_sth = $dbi->prepare($seq_score_sql);
164 
165 
166  foreach my $name (@names){
167  $new_sth->execute($name);
168  my ($object_xref_id, $acc, $xref_id, $identity, $status, $object_type, $ensembl_id, $info_type);
169  $new_sth->bind_columns(\$object_xref_id, \$acc, \$xref_id, \$identity, \$status, \$object_type, \$ensembl_id, \$info_type);
170  my $last_acc = "";
171  my $last_name = "";
172  my $best_xref_id = undef;
173  my @best_ensembl_id = undef;
174  my $last_xref_id = 0;
175  my $seen = 0;
176  my @gone; # list of xref_ids that we've already seen for this accession
177  while($new_sth->fetch){
178  if($last_acc eq $acc){
179  if($xref_id != $best_xref_id){
180  # We've already seen this accession before, and this xref_id is not the best one
181 
182  $seen = ($xref_id == $last_xref_id);
183 
184  $last_xref_id = $xref_id;
185 # If xref is a sequence_match, we want to copy the alignment identity_xref to prioritised mappings of the same ensembl_id
186  if ($info_type eq 'SEQUENCE_MATCH') {
187  my ($query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue, $best_object_xref_id);
188  $seq_score_sth->execute($object_xref_id);
189  $seq_score_sth->bind_columns(\$query_identity, \$target_identity, \$hit_start, \$hit_end, \$translation_start, \$translation_end, \$cigar_line, \$score, \$evalue);
190  $seq_score_sth->fetch();
191  $best_ox_sth->execute($best_xref_id, $object_type, $ensembl_id);
192  $best_ox_sth->bind_columns(\$best_object_xref_id);
193  $best_ox_sth->fetch();
194  $idx_copy_sth->execute($query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue, $best_object_xref_id);
195  }
196  # If the xref is marked DUMP_OUT, set it to FAILED_PRIORITY
197  if($status eq "DUMP_OUT"){
198  $update_ox_sth->execute($object_xref_id);
199 ## If it is the first time processing this xref_id, also process dependents and update status
200  if (!$seen) {
201  $update_x_sth->execute($xref_id);
202 # Copy synonyms across if they are missing
203  $syn_copy_sth->execute($best_xref_id, $xref_id);
204  $self->process_dependents($xref_id, $best_xref_id, $dbi);
205  }
206  }
207  else{ # not DUMP_OUT
208  $update_x_sth->execute($xref_id);
209  }
210  } else {
211 # Alignment did not pass, dismiss
212  if ($status eq 'FAILED_CUTOFF') {
213  next;
214  }
215  ## There might be several mappings for the best priority
216  push @best_ensembl_id, $ensembl_id;
217  }
218  if(@gone){ #best priority failed so another one now found so set dumped;
219  if($last_name eq $acc){
220  foreach my $d (@gone){
221  $update_x_sth->execute($d);
222  }
223  }
224  }
225  }
226  else{ # NEW xref_id
227  if($status eq "DUMP_OUT"){
228  $last_acc = $acc;
229  $best_xref_id = $xref_id;
230  @best_ensembl_id = ($ensembl_id);
231  if(@gone and $last_name eq $acc){
232  foreach my $d (@gone){
233  $update_x_sth->execute($d);
234  }
235  @gone=();
236  }
237  }
238  else{ # new xref_id not DUMP_OUT
239  if ($last_name ne $acc) { @gone = () } # new accession
240  push @gone, $xref_id;
241  $last_name = $acc;
242  }
243  }
244  }
245  }
246  $new_sth->finish;
247 
248  $update_ox_sth->finish;
249  $update_x_sth->finish;
250  $seq_score_sth->finish;
251  $best_ox_sth->finish;
252  $idx_copy_sth->finish;
253  $syn_copy_sth->finish;
254 
255  my $sth = $dbi->prepare("insert into process_status (status, date) values('prioritys_flagged',now())");
256  $sth->execute();
257  $sth->finish;
258 }
259 
260 sub process_dependents{
261 # master xref IDs are entries for the current accession via various methods. We take dependent xrefs from the old and add to the new
262  my ($self, $old_master_xref_id, $new_master_xref_id, $dbi) = @_;
263 
264 
265  my $matching_ens_sth = $dbi->prepare("select distinct ensembl_object_type, ensembl_id from object_xref where ox_status not in ('FAILED_CUTOFF') and xref_id = ? order by ensembl_object_type");
266  my $dep_sth = $dbi->prepare("select distinct dx.dependent_xref_id, dx.linkage_annotation, dx.linkage_source_id, ox.ensembl_object_type from dependent_xref dx, object_xref ox where ox.xref_id = dx.dependent_xref_id and ox.master_xref_id = dx.master_xref_id and dx.master_xref_id = ? order by ox.ensembl_object_type");
267  my $insert_dep_x_sth = $dbi->prepare("insert into dependent_xref(master_xref_id, dependent_xref_id, linkage_annotation, linkage_source_id) values(?, ?, ?, ?)");
268  my $insert_dep_ox_sth = $dbi->prepare("insert ignore into object_xref(master_xref_id, ensembl_object_type, ensembl_id, linkage_type, ox_status, xref_id) values(?, ?, ?, 'DEPENDENT', 'DUMP_OUT', ?)");
269  my $dep_ox_sth = $dbi->prepare("select object_xref_id from object_xref where master_xref_id = ? and ensembl_object_type = ? and ensembl_id = ? and linkage_type = 'DEPENDENT' AND ox_status = 'DUMP_OUT' and xref_id = ?");
270  my $insert_ix_sth = $dbi->prepare("insert ignore into identity_xref(object_xref_id, query_identity, target_identity) values(?, 100, 100)");
271 
272  my @master_xrefs = ($old_master_xref_id);
273  my $recursive = 0;
274 
275  my ($new_object_type, $new_ensembl_id, $old_object_type, $old_ensembl_id);
276  my ($dep_xref_id, $linkage_annotation, $new_object_xref_id, $linkage_source_id, $object_type);
277 
278 
279  # Create a hash of all possible mappings for this accession
280  my %ensembl_ids;
281  $matching_ens_sth->execute($new_master_xref_id);
282  $matching_ens_sth->bind_columns(\$new_object_type, \$new_ensembl_id);
283  while ($matching_ens_sth->fetch()) {
284  push @{ $ensembl_ids{$new_object_type} }, $new_ensembl_id;
285  }
286  my %old_ensembl_ids;
287  $matching_ens_sth->execute($old_master_xref_id);
288  $matching_ens_sth->bind_columns(\$old_object_type, \$old_ensembl_id);
289  while ($matching_ens_sth->fetch()) {
290  push @{ $old_ensembl_ids{$old_object_type} }, $old_ensembl_id;
291  }
292 
293 
294  ## Loop through all dependent xrefs of old master xref, and recurse
295  while(my $xref_id = pop(@master_xrefs)){
296 
297  # Get dependent xrefs, be they gene, transcript or translation
298  $dep_sth->execute($xref_id);
299  $dep_sth->bind_columns(\$dep_xref_id, \$linkage_annotation, \$linkage_source_id, \$object_type);
300  if ($recursive) {
301  $new_master_xref_id = $xref_id;
302  }
303  while($dep_sth->fetch()){
304 
305 
306  # Remove all mappings to low priority xrefs
307  # Then delete any leftover identity or go xrefs of it
308  foreach my $ensembl_id (@{ $old_ensembl_ids{$object_type}} ) {
309  $self->_detach_object_xref($xref_id, $dep_xref_id, $object_type, $ensembl_id, $dbi);
310  }
311 
312  # Duplicate each dependent for the new master xref if it is the first in the chain
313  unless ($recursive) {
314  $insert_dep_x_sth->execute($new_master_xref_id, $dep_xref_id, $linkage_annotation, $linkage_source_id);
315  }
316 
317  # Loop through all chosen (best) ensembl ids mapped to priority xref, and connect them with object_xrefs
318  foreach my $ensembl_id (@{ $ensembl_ids{$object_type} }) {
319  # Add new object_xref for each best_ensembl_id.
320  $insert_dep_ox_sth->execute($new_master_xref_id, $object_type, $ensembl_id, $dep_xref_id);
321 
322  $dep_ox_sth->execute($new_master_xref_id, $object_type, $ensembl_id, $dep_xref_id);
323  $dep_ox_sth->bind_columns(\$new_object_xref_id);
324  while ($dep_ox_sth->fetch()) {
325  $insert_ix_sth->execute($new_object_xref_id);
326  }
327  }
328  unless ($dep_xref_id == $xref_id) {
329  push @master_xrefs, $dep_xref_id; # remember chained dependent xrefs
330  }
331  }
332  $recursive = 1;
333  }
334 
335  $matching_ens_sth->finish();
336  $dep_sth->finish();
337  $insert_dep_x_sth->finish();
338  $insert_dep_ox_sth->finish();
339  $dep_ox_sth->finish();
340  $insert_ix_sth->finish();
341 }
342 
343 # Delete identity xrefs for a given object xref
344 # Set unimportant object_xrefs to FAILED_PRIORITY, and delete all those that remain
345 sub _detach_object_xref {
346  my $self = shift;
347  my ($xref_id, $dep_xref_id, $object_type, $ensembl_id, $dbi) = @_;
348  # Drop all the identity and go xrefs for the dependents of an xref
349  my $remove_dep_ox_sth = $dbi->prepare(
350  "DELETE ix FROM object_xref ox \
351  LEFT JOIN identity_xref ix ON ix.object_xref_id = ox.object_xref_id \
352  WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ensembl_id = ?"
353  );
354  # Fail the object_xrefs that did link to the deleted identity/go xrefs.
355  # This only updates one of potentially many, due to table contraints.
356  my $update_dep_ox_sth = $dbi->prepare(
357  "UPDATE IGNORE object_xref SET ox_status = 'FAILED_PRIORITY' \
358  WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ox_status = 'DUMP_OUT' AND ensembl_id = ?"
359  );
360  # This deletes everything left behind by the previous query.
361  my $clean_dep_ox_sth = $dbi->prepare(
362  "DELETE FROM object_xref \
363  WHERE master_xref_id = ? AND ensembl_object_type = ? AND xref_id = ? AND ox_status = 'DUMP_OUT' AND ensembl_id = ?"
364  );
365 
366  $remove_dep_ox_sth->execute($xref_id, $object_type, $dep_xref_id, $ensembl_id);
367  # change status of object_xref to FAILED_PRIORITY for record keeping
368  $update_dep_ox_sth->execute($xref_id, $object_type, $dep_xref_id, $ensembl_id);
369  # delete the duplicates.
370  $clean_dep_ox_sth->execute($xref_id, $object_type, $dep_xref_id, $ensembl_id);
371 
372  $remove_dep_ox_sth->finish();
373  $update_dep_ox_sth->finish();
374  $clean_dep_ox_sth->finish();
375 
376 }
377 
378 
379 1;
XrefMapper::BasicMapper
Definition: BasicMapper.pm:8
accession
public accession()
XrefMapper::BasicMapper::xref
public XrefMapper::db xref()