ensembl-hive  2.8.1
ProcessPaired.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefMapper::ProcessPaired;
21 use strict;
22 
23 use vars '@ISA';
24 @ISA = qw{ XrefMapper::BasicMapper };
25 
26 use warnings;
28 
29 use Cwd;
30 use DBI;
31 use File::Basename;
32 use IPC::Open3;
33 
34 sub new {
35  my($class, $mapper) = @_;
36 
37  my $self ={};
38  bless $self,$class;
39  $self->xref($mapper->xref);
40  $self->verbose($mapper->verbose);
41  return $self;
42 }
43 
44 
45 sub process{
46  my ($self) = @_;
47 
48  #get all 'DUMP_OUT' transcript RefSeq object xrefs (ccds priority or refseq sequence matched)
49  #foreach transcript refseq find its protein pair, check if it's matched to the corresponding translation
50  #if it's not add 'INFERRED_PAIR' object xref
51 
52  #set ox_status to 'MULTI_DELETE' for all translation RefSeq object xrefs if a better object_xref exists (better object_xref is one whose corresponding transcript is linked to the paired RefSeq_mRNA)
53 
54 
55  print "Process Pairs\n" if($self->verbose);
56  my $dbi = $self->xref->dbc;
57  my $object_xref_id;
58 
59  #this query gives us transcript RefSeq_mRNA% object xrefs, and the paired RefSeq_peptide% accession as well as the translation id for the transcript
60  my $transcr_obj_xrefs_sth = $dbi->prepare("select gtt.translation_id, p.source_id, p.accession1, ix.query_identity, ix.target_identity from object_xref ox join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_mRNA%') join pairs p on (x.accession = p.accession2) join gene_transcript_translation gtt on (gtt.transcript_id = ox.ensembl_id) join identity_xref ix using(object_xref_id)");
61 
62  #this query is used to check if and object_xref exists for the related translation and paired RefSeq_peptide% with a status of 'DUMP_OUT'
63  my $ox_translation_sth = $dbi->prepare("select ox.object_xref_id, ox.xref_id from object_xref ox join xref x using(xref_id) where ox.ox_status in ('DUMP_OUT', 'FAILED_PRIORITY') and ox.ensembl_object_type = 'Translation' and ox.ensembl_id = ? and x.source_id = ? and x.accession = ?");
64 
65  my $ox_insert_sth = $dbi->prepare("insert into object_xref (xref_id, ensembl_id, ensembl_object_type, linkage_type, ox_status) values(?, ?, ?, 'INFERRED_PAIR', 'DUMP_OUT')");
66  my $get_object_xref_id_sth = $dbi->prepare("select object_xref_id from object_xref where xref_id = ? and ensembl_id = ? and ensembl_object_type = ? and linkage_type = 'INFERRED_PAIR' and ox_status = 'DUMP_OUT'");
67 
68  my $xref_sth = $dbi->prepare("select xref_id from xref where accession = ? and source_id = ?");
69 
70  my $xref_update_sth = $dbi->prepare("update xref set info_type = 'INFERRED_PAIR' where xref_id = ?");
71  my $identity_update_sth = $dbi->prepare("insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)");
72 
73  my $transl_object_xrefs_sth = $dbi->prepare("select ox.object_xref_id, ox.ensembl_id, x.accession, gtt.transcript_id from gene_transcript_translation gtt join object_xref ox on (gtt.translation_id = ox.ensembl_id and ox.ensembl_object_type = 'Translation') join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT' and ox.ensembl_object_type = 'Translation') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_peptide%')");
74 
75  my $ox_mark_delete_sth = $dbi->prepare("update object_xref set ox_status = 'MULTI_DELETE' where object_xref_id = ?");
76 
77  $transcr_obj_xrefs_sth->execute();
78 
79  my %change;
80 
81  #this hash stores all the translations linked to RefSeq_peptide% xrefs whose transcript is also linked to the paired RefSeq_mRNA - this will be needed to get rid of RefSeq_peptide object xrefs which don't have the additional support of transcripts linked to paired RefSeq_mRNAs; keyed on RefSeq_peptide% accession
82  my %RefSeq_pep_translation;
83 
84  while(my ($translation_id, $pep_source_id, $pep_accession, $query_identity, $target_identity) = $transcr_obj_xrefs_sth->fetchrow_array() ){
85 
86  #check if translation is linked to the paired RefSeq peptide
87 
88  if ($translation_id) {
89 
90  $ox_translation_sth->execute($translation_id, $pep_source_id, $pep_accession);
91  my ($transl_object_xref_id, $xref_id) = $ox_translation_sth->fetchrow_array();
92 
93  #if it's already linked we don't have to do anything
94 
95  if (!$transl_object_xref_id) {
96 
97  #add a new object xref
98  $xref_sth->execute($pep_accession, $pep_source_id);
99  ($xref_id) = $xref_sth->fetchrow_array();
100  if (!$xref_id) {
101  die("Xref not found for accession $pep_accession source_id $pep_source_id");
102  }
103  $ox_insert_sth->execute($xref_id, $translation_id, "Translation") || die "Could not insert object xref $object_xref_id: xref_id $xref_id, translation_id $translation_id" ;
104  $get_object_xref_id_sth->execute($xref_id, $translation_id, 'Translation');
105  $object_xref_id = ($get_object_xref_id_sth->fetchrow_array())[0];
106  $xref_update_sth->execute($xref_id)|| die "Could not update xref_id $xref_id";
107 
108  if ($query_identity && $target_identity) {
109  $identity_update_sth->execute($object_xref_id, $query_identity, $target_identity);
110  }
111 
112  $change{'translation object xrefs added'}++;
113  $transl_object_xref_id = $object_xref_id;
114 
115  }
116 
117  if ($transl_object_xref_id) {
118  push @{$RefSeq_pep_translation{$pep_accession}}, $translation_id;
119  }
120 
121  }
122 
123  }
124 
125  $transcr_obj_xrefs_sth->finish();
126  $ox_translation_sth->finish();
127  $ox_insert_sth->finish();
128  $xref_update_sth->finish();
129  $identity_update_sth->finish();
130  $xref_sth->finish();
131 
132  #go through RefSeq_peptide% object_xrefs
133  $transl_object_xrefs_sth->execute();
134  while (my ($translation_object_xref_id, $translation_id, $pep_accession, $transcript_id) = $transl_object_xrefs_sth->fetchrow_array() ) {
135 
136  if (exists($RefSeq_pep_translation{$pep_accession}) ) {
137 
138  my $found = 0;
139  foreach my $tr_id (@{$RefSeq_pep_translation{$pep_accession}}) {
140  if ($tr_id == $translation_id) {
141  $found = 1;
142  }
143  }
144  if (!$found) {
145  #this translations's transcript is not matched with the paired RefSeq_mRNA%,
146  #change the status to 'MULTI_DELETE'
147  $ox_mark_delete_sth->execute($translation_object_xref_id) || die("Failed to update status to 'MULTI_DELETE for object_xref_id $translation_object_xref_id");
148  # Process all dependent xrefs as well
149  $self->process_dependents($translation_object_xref_id, $translation_id, $transcript_id, $dbi);
150 
151  $change{'translation object xrefs removed'}++;
152  }
153 
154  }
155  }
156 
157  $transl_object_xrefs_sth->finish();
158  $ox_mark_delete_sth->finish();
159 
160  foreach my $key (keys %change){
161  print "$key:\t".$change{$key}."\n" if($self->verbose);
162  }
163 
164  #update process status
165  my $sth_stat = $dbi->prepare("insert into process_status (status, date) values('processed_pairs',now())");
166  $sth_stat->execute();
167  $sth_stat->finish;
168 }
169 
170 sub process_dependents {
171  my ($self, $translation_object_xref_id, $translation_id, $transcript_id, $dbi) = @_;
172 
173  my $dep_tl_sth = $dbi->prepare("select distinct dependent_ox.object_xref_id from object_xref master_ox, object_xref dependent_ox, xref dependent, xref master, dependent_xref dx where dependent.xref_id = dx.dependent_xref_id and master.xref_id = dx.master_xref_id and dependent.xref_id = dependent_ox.xref_id and master.xref_id = master_ox.xref_id and master_ox.object_xref_id = ? and dependent_ox.master_xref_id = master.xref_id and dependent_ox.ensembl_id = ? and dependent_ox.ensembl_object_type = 'Translation' and dependent_ox.ox_status = 'DUMP_OUT' ");
174  my $dep_tr_sth = $dbi->prepare("select distinct dependent_ox.object_xref_id from object_xref master_ox, object_xref dependent_ox, xref dependent, xref master, dependent_xref dx where dependent.xref_id = dx.dependent_xref_id and master.xref_id = dx.master_xref_id and dependent.xref_id = dependent_ox.xref_id and master.xref_id = master_ox.xref_id and master_ox.object_xref_id = ? and dependent_ox.master_xref_id = master.xref_id and dependent_ox.ensembl_id = ? and dependent_ox.ensembl_object_type = 'Transcript' and dependent_ox.ox_status = 'DUMP_OUT' ");
175  my $ox_dx_delete_sth = $dbi->prepare("update object_xref set ox_status = 'MULTI_DELETE' where object_xref_id = ?");
176 
177  my @master_object_xrefs;
178  my $new_master_object_xref_id;
179  push @master_object_xrefs, $translation_object_xref_id;
180  my %master_object_xref_id;
181  $master_object_xref_id{$translation_object_xref_id} = 1;
182 
183  while (my $master_object_xref_id = pop(@master_object_xrefs)) {
184  my $dependent_object_xref_id;
185  $dep_tl_sth->execute($master_object_xref_id, $translation_id);
186  $dep_tl_sth->bind_columns(\$dependent_object_xref_id);
187  while ($dep_tl_sth->fetch()) {
188  $ox_dx_delete_sth->execute($dependent_object_xref_id);
189  if (!defined $master_object_xref_id{$dependent_object_xref_id}) {
190  $master_object_xref_id{$dependent_object_xref_id} = 1;
191  push @master_object_xrefs, $dependent_object_xref_id;
192  }
193  }
194  $dep_tr_sth->execute($master_object_xref_id, $transcript_id);
195  $dep_tr_sth->bind_columns(\$dependent_object_xref_id);
196  while ($dep_tr_sth->fetch()) {
197  $ox_dx_delete_sth->execute($dependent_object_xref_id);
198  if (!defined $master_object_xref_id{$dependent_object_xref_id}) {
199  $master_object_xref_id{$dependent_object_xref_id} = 1;
200  push @master_object_xrefs, $dependent_object_xref_id;
201  }
202  }
203  }
204 }
205 
206 1;
XrefMapper::db::dbc
public dbc()
XrefMapper::BasicMapper
Definition: BasicMapper.pm:8
XrefMapper::BasicMapper::xref
public XrefMapper::db xref()