3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefMapper::ProcessPaired;
35 my($class, $mapper) = @_;
39 $self->
xref($mapper->xref);
40 $self->verbose($mapper->verbose);
48 #get all 'DUMP_OUT' transcript RefSeq object xrefs (ccds priority or refseq sequence matched)
49 #foreach transcript refseq find its protein pair, check if it's matched to the corresponding translation
50 #if it's not add 'INFERRED_PAIR' object xref
52 #set ox_status to 'MULTI_DELETE' for all translation RefSeq object xrefs if a better object_xref exists (better object_xref is one whose corresponding transcript is linked to the paired RefSeq_mRNA)
55 print
"Process Pairs\n" if($self->verbose);
56 my $dbi = $self->xref->
dbc;
59 #this query gives us transcript RefSeq_mRNA% object xrefs, and the paired RefSeq_peptide% accession as well as the translation id for the transcript
60 my $transcr_obj_xrefs_sth = $dbi->prepare(
"select gtt.translation_id, p.source_id, p.accession1, ix.query_identity, ix.target_identity from object_xref ox join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_mRNA%') join pairs p on (x.accession = p.accession2) join gene_transcript_translation gtt on (gtt.transcript_id = ox.ensembl_id) join identity_xref ix using(object_xref_id)");
62 #this query is used to check if and object_xref exists for the related translation and paired RefSeq_peptide% with a status of 'DUMP_OUT'
63 my $ox_translation_sth = $dbi->prepare(
"select ox.object_xref_id, ox.xref_id from object_xref ox join xref x using(xref_id) where ox.ox_status in ('DUMP_OUT', 'FAILED_PRIORITY') and ox.ensembl_object_type = 'Translation' and ox.ensembl_id = ? and x.source_id = ? and x.accession = ?");
65 my $ox_insert_sth = $dbi->prepare(
"insert into object_xref (xref_id, ensembl_id, ensembl_object_type, linkage_type, ox_status) values(?, ?, ?, 'INFERRED_PAIR', 'DUMP_OUT')");
66 my $get_object_xref_id_sth = $dbi->prepare(
"select object_xref_id from object_xref where xref_id = ? and ensembl_id = ? and ensembl_object_type = ? and linkage_type = 'INFERRED_PAIR' and ox_status = 'DUMP_OUT'");
68 my $xref_sth = $dbi->prepare(
"select xref_id from xref where accession = ? and source_id = ?");
70 my $xref_update_sth = $dbi->prepare(
"update xref set info_type = 'INFERRED_PAIR' where xref_id = ?");
71 my $identity_update_sth = $dbi->prepare(
"insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)");
73 my $transl_object_xrefs_sth = $dbi->prepare(
"select ox.object_xref_id, ox.ensembl_id, x.accession, gtt.transcript_id from gene_transcript_translation gtt join object_xref ox on (gtt.translation_id = ox.ensembl_id and ox.ensembl_object_type = 'Translation') join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT' and ox.ensembl_object_type = 'Translation') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_peptide%')");
75 my $ox_mark_delete_sth = $dbi->prepare(
"update object_xref set ox_status = 'MULTI_DELETE' where object_xref_id = ?");
77 $transcr_obj_xrefs_sth->execute();
81 #this hash stores all the translations linked to RefSeq_peptide% xrefs whose transcript is also linked to the paired RefSeq_mRNA - this will be needed to get rid of RefSeq_peptide object xrefs which don't have the additional support of transcripts linked to paired RefSeq_mRNAs; keyed on RefSeq_peptide% accession
82 my %RefSeq_pep_translation;
84 while(my ($translation_id, $pep_source_id, $pep_accession, $query_identity, $target_identity) = $transcr_obj_xrefs_sth->fetchrow_array() ){
86 #check if translation is linked to the paired RefSeq peptide
88 if ($translation_id) {
90 $ox_translation_sth->execute($translation_id, $pep_source_id, $pep_accession);
91 my ($transl_object_xref_id, $xref_id) = $ox_translation_sth->fetchrow_array();
93 #if it's already linked we don't have to do anything
95 if (!$transl_object_xref_id) {
97 #add a new object xref
98 $xref_sth->execute($pep_accession, $pep_source_id);
99 ($xref_id) = $xref_sth->fetchrow_array();
101 die(
"Xref not found for accession $pep_accession source_id $pep_source_id");
103 $ox_insert_sth->execute($xref_id, $translation_id,
"Translation") || die
"Could not insert object xref $object_xref_id: xref_id $xref_id, translation_id $translation_id" ;
104 $get_object_xref_id_sth->execute($xref_id, $translation_id,
'Translation');
105 $object_xref_id = ($get_object_xref_id_sth->fetchrow_array())[0];
106 $xref_update_sth->execute($xref_id)|| die
"Could not update xref_id $xref_id";
108 if ($query_identity && $target_identity) {
109 $identity_update_sth->execute($object_xref_id, $query_identity, $target_identity);
112 $change{
'translation object xrefs added'}++;
113 $transl_object_xref_id = $object_xref_id;
117 if ($transl_object_xref_id) {
118 push @{$RefSeq_pep_translation{$pep_accession}}, $translation_id;
125 $transcr_obj_xrefs_sth->finish();
126 $ox_translation_sth->finish();
127 $ox_insert_sth->finish();
128 $xref_update_sth->finish();
129 $identity_update_sth->finish();
132 #go through RefSeq_peptide% object_xrefs
133 $transl_object_xrefs_sth->execute();
134 while (my ($translation_object_xref_id, $translation_id, $pep_accession, $transcript_id) = $transl_object_xrefs_sth->fetchrow_array() ) {
136 if (exists($RefSeq_pep_translation{$pep_accession}) ) {
139 foreach my $tr_id (@{$RefSeq_pep_translation{$pep_accession}}) {
140 if ($tr_id == $translation_id) {
145 #this translations's transcript is not matched with the paired RefSeq_mRNA%,
146 #change the status to 'MULTI_DELETE'
147 $ox_mark_delete_sth->execute($translation_object_xref_id) || die(
"Failed to update status to 'MULTI_DELETE for object_xref_id $translation_object_xref_id");
148 # Process all dependent xrefs as well
149 $self->process_dependents($translation_object_xref_id, $translation_id, $transcript_id, $dbi);
151 $change{
'translation object xrefs removed'}++;
157 $transl_object_xrefs_sth->finish();
158 $ox_mark_delete_sth->finish();
160 foreach my $key (keys %change){
161 print
"$key:\t".$change{$key}.
"\n" if($self->verbose);
164 #update process status
165 my $sth_stat = $dbi->prepare(
"insert into process_status (status, date) values('processed_pairs',now())");
166 $sth_stat->execute();
170 sub process_dependents {
171 my ($self, $translation_object_xref_id, $translation_id, $transcript_id, $dbi) = @_;
173 my $dep_tl_sth = $dbi->prepare(
"select distinct dependent_ox.object_xref_id from object_xref master_ox, object_xref dependent_ox, xref dependent, xref master, dependent_xref dx where dependent.xref_id = dx.dependent_xref_id and master.xref_id = dx.master_xref_id and dependent.xref_id = dependent_ox.xref_id and master.xref_id = master_ox.xref_id and master_ox.object_xref_id = ? and dependent_ox.master_xref_id = master.xref_id and dependent_ox.ensembl_id = ? and dependent_ox.ensembl_object_type = 'Translation' and dependent_ox.ox_status = 'DUMP_OUT' ");
174 my $dep_tr_sth = $dbi->prepare(
"select distinct dependent_ox.object_xref_id from object_xref master_ox, object_xref dependent_ox, xref dependent, xref master, dependent_xref dx where dependent.xref_id = dx.dependent_xref_id and master.xref_id = dx.master_xref_id and dependent.xref_id = dependent_ox.xref_id and master.xref_id = master_ox.xref_id and master_ox.object_xref_id = ? and dependent_ox.master_xref_id = master.xref_id and dependent_ox.ensembl_id = ? and dependent_ox.ensembl_object_type = 'Transcript' and dependent_ox.ox_status = 'DUMP_OUT' ");
175 my $ox_dx_delete_sth = $dbi->prepare(
"update object_xref set ox_status = 'MULTI_DELETE' where object_xref_id = ?");
177 my @master_object_xrefs;
178 my $new_master_object_xref_id;
179 push @master_object_xrefs, $translation_object_xref_id;
180 my %master_object_xref_id;
181 $master_object_xref_id{$translation_object_xref_id} = 1;
183 while (my $master_object_xref_id = pop(@master_object_xrefs)) {
184 my $dependent_object_xref_id;
185 $dep_tl_sth->execute($master_object_xref_id, $translation_id);
186 $dep_tl_sth->bind_columns(\$dependent_object_xref_id);
187 while ($dep_tl_sth->fetch()) {
188 $ox_dx_delete_sth->execute($dependent_object_xref_id);
189 if (!defined $master_object_xref_id{$dependent_object_xref_id}) {
190 $master_object_xref_id{$dependent_object_xref_id} = 1;
191 push @master_object_xrefs, $dependent_object_xref_id;
194 $dep_tr_sth->execute($master_object_xref_id, $transcript_id);
195 $dep_tr_sth->bind_columns(\$dependent_object_xref_id);
196 while ($dep_tr_sth->fetch()) {
197 $ox_dx_delete_sth->execute($dependent_object_xref_id);
198 if (!defined $master_object_xref_id{$dependent_object_xref_id}) {
199 $master_object_xref_id{$dependent_object_xref_id} = 1;
200 push @master_object_xrefs, $dependent_object_xref_id;