my ($self) = @_;
#get all 'DUMP_OUT' transcript RefSeq object xrefs (ccds priority or refseq sequence matched)
#foreach transcript refseq find its protein pair, check if it's matched to the corresponding translation
#if it's not add 'INFERRED_PAIR' object xref
#set ox_status to 'MULTI_DELETE' for all translation RefSeq object xrefs if a better object_xref exists (better object_xref is one whose corresponding transcript is linked to the paired RefSeq_mRNA)
print "Process Pairs\n" if($self->verbose);
my $dbi = $self->xref->dbc;
my $object_xref_id;
#this query gives us transcript RefSeq_mRNA% object xrefs, and the paired RefSeq_peptide% accession as well as the translation id for the transcript
my $transcr_obj_xrefs_sth = $dbi->prepare("select gtt.translation_id, p.source_id, p.accession1, ix.query_identity, ix.target_identity from object_xref ox join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_mRNA%') join pairs p on (x.accession = p.accession2) join gene_transcript_translation gtt on (gtt.transcript_id = ox.ensembl_id) join identity_xref ix using(object_xref_id)");
#this query is used to check if and object_xref exists for the related translation and paired RefSeq_peptide% with a status of 'DUMP_OUT'
my $ox_translation_sth = $dbi->prepare("select ox.object_xref_id, ox.xref_id from object_xref ox join xref x using(xref_id) where ox.ox_status in ('DUMP_OUT', 'FAILED_PRIORITY') and ox.ensembl_object_type = 'Translation' and ox.ensembl_id = ? and x.source_id = ? and x.accession = ?");
my $ox_insert_sth = $dbi->prepare("insert into object_xref (xref_id, ensembl_id, ensembl_object_type, linkage_type, ox_status) values(?, ?, ?, 'INFERRED_PAIR', 'DUMP_OUT')");
my $get_object_xref_id_sth = $dbi->prepare("select object_xref_id from object_xref where xref_id = ? and ensembl_id = ? and ensembl_object_type = ? and linkage_type = 'INFERRED_PAIR' and ox_status = 'DUMP_OUT'");
my $xref_sth = $dbi->prepare("select xref_id from xref where accession = ? and source_id = ?");
my $xref_update_sth = $dbi->prepare("update xref set info_type = 'INFERRED_PAIR' where xref_id = ?");
my $identity_update_sth = $dbi->prepare("insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)");
my $transl_object_xrefs_sth = $dbi->prepare("select ox.object_xref_id, ox.ensembl_id, x.accession, gtt.transcript_id from gene_transcript_translation gtt join object_xref ox on (gtt.translation_id = ox.ensembl_id and ox.ensembl_object_type = 'Translation') join xref x on (ox.xref_id = x.xref_id and ox.ox_status = 'DUMP_OUT' and ox.ensembl_object_type = 'Translation') join source s on (x.source_id = s.source_id and s.name like 'RefSeq\_peptide%')");
my $ox_mark_delete_sth = $dbi->prepare("update object_xref set ox_status = 'MULTI_DELETE' where object_xref_id = ?");
$transcr_obj_xrefs_sth->execute();
my %change;
#this hash stores all the translations linked to RefSeq_peptide% xrefs whose transcript is also linked to the paired RefSeq_mRNA - this will be needed to get rid of RefSeq_peptide object xrefs which don't have the additional support of transcripts linked to paired RefSeq_mRNAs; keyed on RefSeq_peptide% accession
my %RefSeq_pep_translation;
while(my ($translation_id, $pep_source_id, $pep_accession, $query_identity, $target_identity) = $transcr_obj_xrefs_sth->fetchrow_array() ){
#check if translation is linked to the paired RefSeq peptide
if ($translation_id) {
$ox_translation_sth->execute($translation_id, $pep_source_id, $pep_accession);
my ($transl_object_xref_id, $xref_id) = $ox_translation_sth->fetchrow_array();
#if it's already linked we don't have to do anything
if (!$transl_object_xref_id) {
#add a new object xref
$xref_sth->execute($pep_accession, $pep_source_id);
($xref_id) = $xref_sth->fetchrow_array();
if (!$xref_id) {
die("Xref not found for accession $pep_accession source_id $pep_source_id");
}
$ox_insert_sth->execute($xref_id, $translation_id, "Translation") || die "Could not insert object xref $object_xref_id: xref_id $xref_id, translation_id $translation_id" ;
$get_object_xref_id_sth->execute($xref_id, $translation_id, 'Translation');
$object_xref_id = ($get_object_xref_id_sth->fetchrow_array())[0];
$xref_update_sth->execute($xref_id)|| die "Could not update xref_id $xref_id";
if ($query_identity && $target_identity) {
$identity_update_sth->execute($object_xref_id, $query_identity, $target_identity);
}
$change{'translation object xrefs added'}++;
$transl_object_xref_id = $object_xref_id;
}
if ($transl_object_xref_id) {
push @{$RefSeq_pep_translation{$pep_accession}}, $translation_id;
}
}
}
$transcr_obj_xrefs_sth->finish();
$ox_translation_sth->finish();
$ox_insert_sth->finish();
$xref_update_sth->finish();
$identity_update_sth->finish();
$xref_sth->finish();
#go through RefSeq_peptide% object_xrefs
$transl_object_xrefs_sth->execute();
while (my ($translation_object_xref_id, $translation_id, $pep_accession, $transcript_id) = $transl_object_xrefs_sth->fetchrow_array() ) {
if (exists($RefSeq_pep_translation{$pep_accession}) ) {
my $found = 0;
foreach my $tr_id (@{$RefSeq_pep_translation{$pep_accession}}) {
if ($tr_id == $translation_id) {
$found = 1;
}
}
if (!$found) {
#this translations's transcript is not matched with the paired RefSeq_mRNA%,
#change the status to 'MULTI_DELETE'
$ox_mark_delete_sth->execute($translation_object_xref_id) || die("Failed to update status to 'MULTI_DELETE for object_xref_id $translation_object_xref_id");
# Process all dependent xrefs as well
$self->process_dependents($translation_object_xref_id, $translation_id, $transcript_id, $dbi);
$change{'translation object xrefs removed'}++;
}
}
}
$transl_object_xrefs_sth->finish();
$ox_mark_delete_sth->finish();
foreach my $key (keys %change){
print "$key:\t".$change{$key}."\n" if($self->verbose);
}
#update process status
my $sth_stat = $dbi->prepare("insert into process_status (status, date) values('processed_pairs',now())");
$sth_stat->execute();
$sth_stat->finish;
}