3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefMapper::OfficialNaming;
31 #@ISA = qw{ XrefMapper::BasicMapper };
34 ###############################################################################################
35 # Run the offical naming code.
37 # At present this is done for the following species ONLY :-
38 # ZebraFish (ZFIN_ID),
43 # There is currently no official domain source for pig, but it has manual annotation
44 # We use PIGGY as a fake official naming source
46 # 1) So we find the best official name for each gene
48 # i) official domain name source (HGNC, MGI, ZFIN_ID, RGD)
53 # NOTE: for "i)" above, if more than one exists we find the "best" one if possible
54 # and remove the other ones. If there is more than one "best" we keep all and
55 # just choose the first one for the name
57 # To find the "best" one we use the priority.
58 # Priority should be set correctly in the xref_config.ini file to use
59 # first any names coming from the official naming source
61 # Set this as the display_xref for the gene.
63 # 2) Foreach Transcript of that gene
65 # we assign a transcript extension (splice number?)
66 # This is just a counter starting at 201 which is incremented each time
67 # We add this to the name to get a "XXX_trans_name"xref where XXX is the
68 # type of source used to get the name. This is then added as an xref and
69 # is set to the display_xref for that transcript.
71 ##############################################################################################
74 ####################################
75 # Create OfficialNaming object
76 # Get some info from the BasicMapper
77 ####################################
79 my($class, $mapper) = @_;
83 $self->core($mapper->core);
84 $self->xref($mapper->xref);
85 $self->get_official_name($mapper->get_official_name);
90 ##################################################
91 # This will be the offical database name
92 # HGNC, MGI, ZFIN_ID or PIGGY, comes from BasicMapper
93 #################################################
94 sub get_official_name {
95 my ($self, $arg) = @_;
98 ($self->{_official_name} = $arg );
99 return $self->{_official_name};
104 ##################################################
105 # This is the main subroutine that does everything
106 ##################################################
109 my $species_id = shift;
111 my $dbname = $self->get_official_name();
112 my $dbi = $self->xref->dbc;
114 ###########################################################
115 # If no offical name then we do not want to go any further
116 # Just set status to official_naming_done and return
117 ###########################################################
118 if(!defined($dbname)){
119 $self->update_process_status(
"official_naming_done");
122 $species_id = $self->get_id_from_species_name($self->core->species) unless defined $species_id;
123 $self->species_id($species_id);
126 ###########################################################
127 # If there are any official names on transcripts or translations
128 # move them onto gene level
130 # This is done for 2 reasons
131 # 1) to make the code the same as HGNC is on a gene
132 # and it makes it easier to find.
133 # 2) Later on these are copied to the canonical transcripts
134 # from the genes so move them now.
135 ###########################################################
137 if($dbname eq
"MGI"){ # Copy MGI to Genes
138 $self->biomart_fix(
"MGI",
"Translation",
"Gene");
139 $self->biomart_fix(
"MGI",
"Transcript",
"Gene");
141 if($dbname eq
"ZFIN_ID"){ # Copy ZFIN_ID to Genes
142 $self->biomart_fix(
"ZFIN_ID",
"Translation",
"Gene");
143 $self->biomart_fix(
"ZFIN_ID",
"Transcript",
"Gene");
145 if($dbname eq
"RGD"){ # Copy RGD to Genes
146 $self->biomart_fix(
"RGD",
"Translation",
"Gene");
147 $self->biomart_fix(
"RGD",
"Transcript",
"Gene");
152 ######################################################
153 # Get the current max values for xref and object_xref
154 ######################################################
155 my ($max_object_xref_id, $max_xref_id) = $self->find_max_ids($dbi);
157 my %display_label_to_desc;
158 $self->get_display_label_data(\%display_label_to_desc, $dbi);
161 $self->get_synonyms(\%synonym, $dbi);
164 # get the officail naming external_sources
165 my $dbname_to_source_id = $self->get_new_dbname_sources($dbi); # reference to hash
167 ###########################
168 # Delete the old ones.
169 ###########################
170 $self->delete_old_data($dbname_to_source_id, $dbi);
172 $self->reset_display_xrefs($dbi);
175 my $ga = $db->get_GeneAdaptor();
177 my %gene_to_transcripts;
178 my %gene_id_to_stable_id;
179 my %tran_id_to_stable_id;
182 SELECT gtt.gene_id, gtt.transcript_id, gsi.stable_id, tsi.stable_id
183 FROM gene_transcript_translation gtt, gene_stable_id gsi, transcript_stable_id tsi
184 WHERE gtt.gene_id = gsi.internal_id AND
185 gtt.transcript_id = tsi.internal_id
186 ORDER BY gsi.stable_id, tsi.stable_id
189 my $sth = $dbi->prepare($sql);
192 my ($gene_id, $tran_id, $gsi, $tsi);
193 $sth->bind_columns(\$gene_id, \$tran_id, \$gsi, \$tsi);
196 if(!defined($gene_to_transcripts{$gene_id})){
197 push @sorted_gene_ids, $gene_id;
199 push @{$gene_to_transcripts{$gene_id}}, $tran_id;
200 $gene_id_to_stable_id{$gene_id} = $gsi;
201 $tran_id_to_stable_id{$tran_id} = $tsi;
204 my $dbentrie_sth = $self->get_dbentrie_sth($dbi);
205 my $ins_xref_sth = $self->get_ins_xref_sth($dbi);
206 my $ins_dep_ix_sth = $self->get_ins_dep_ix_sth($dbi);
207 my $ins_object_xref_sth = $self->get_ins_object_xref_sth($dbi);
208 my $set_gene_display_xref_sth = $self->get_set_gene_display_xref_sth($dbi);
210 my %xref_added; # store those added $xref_added{$accession:$source_id} = $xref_id;
213 my %official_name_used;
215 my $ignore_sql =<<IEG;
216 SELECT DISTINCT ox.object_xref_id
217 FROM object_xref ox, dependent_xref dx,
218 xref xmas, xref xdep,
219 source smas, source sdep
220 WHERE ox.xref_id = dx.dependent_xref_id AND
221 dx.dependent_xref_id = xdep.xref_id AND
222 dx.master_xref_id = xmas.xref_id AND
223 xmas.source_id = smas.source_id AND
224 xdep.source_id = sdep.source_id AND
225 smas.name like
"Refseq%predicted" AND
226 sdep.name like
"EntrezGene" AND
227 ox.ox_status =
"DUMP_OUT"
231 my $ignore_sth = $dbi->prepare($ignore_sql);
232 $ignore_sth->execute();
233 my ($ignore_object_xref_id);
234 $ignore_sth->bind_columns(\$ignore_object_xref_id);
235 while($ignore_sth->fetch()){
236 $ignore_object{$ignore_object_xref_id} = 1;
240 while ( my $gene_id = shift @sorted_gene_ids){
242 my $tran_source = $dbname;
244 # symbols to set when found.
245 my $gene_symbol = undef;
246 my $gene_symbol_xref_id = undef;
249 ################################
250 # Get offical name if it has one
251 ################################
252 ($gene_symbol, $gene_symbol_xref_id) =
253 $self->get_official_domain_name({gene_id => $gene_id,
254 gene_to_tran => \%gene_to_transcripts,
255 gene_id_to_stable_id => \%gene_id_to_stable_id,
256 official_name_used => \%official_name_used,
260 if (defined($gene_symbol_xref_id)) {
261 $official_name_used{$gene_symbol_xref_id} = 1;
264 ############################################
265 # If not found see if there is an LRG entry
266 ############################################
267 if(!defined($gene_symbol)){ # look
for LRG
268 ($gene_symbol, $gene_symbol_xref_id, $is_lrg) = $self->find_lrg_hgnc($gene_id, $dbi);
271 ####################################################
272 # If not found look for other valid database sources
273 # These are RFAM and miRBase, as well as EntrezGene
274 ####################################################
275 if(!defined($gene_symbol)){
276 ($gene_symbol, $gene_symbol_xref_id) =
277 $self->find_from_other_sources(\%ignore_object,
278 {gene_id => $gene_id,
279 label_to_desc => \%display_label_to_desc,
281 tran_source => \$tran_source});
284 if(defined($gene_symbol)){
285 my $desc = $display_label_to_desc{$gene_symbol};
286 $set_gene_display_xref_sth->execute($gene_symbol_xref_id, $gene_id);
289 $self->set_transcript_display_xrefs({ max_xref => \$max_xref_id,
290 max_object => \$max_object_xref_id,
292 gene_id_to_stable_id => \%gene_id_to_stable_id,
293 gene_symbol => $gene_symbol,
296 source_id => $dbname_to_source_id->{$tran_source.
"_trans_name"},
297 xref_added => \%xref_added,
298 seen_gene => \%seen_gene,
299 gene_to_tran => \%gene_to_transcripts,
300 tran_source => $tran_source,
307 $self->update_process_status(
'official_naming_done');
314 ####################################################################
315 # Get offical name if it has one
317 # Search gene for dbname entries.
318 # dbname (HGNC||MGI||ZFIN_ID|RGD) dependent on species
320 # Find the "best" one
321 # Remove the lesser ones (set status to MULTI_DELETE for object_xref)
323 # return the gene_symbol and xref_id of the best one
324 ######################################################################
326 sub get_official_domain_name{
327 my ($self, $arg_ref) = @_;
329 my $gene_id = $arg_ref->{gene_id};
330 my $gene_id_to_stable_id = $arg_ref->{gene_id_to_stable_id};
331 my $gene_to_transcripts = $arg_ref->{gene_to_tran};
332 my $official_name_used = $arg_ref->{official_name_used};
333 my $dbi = $arg_ref->{dbi};
336 my $dbname = $self->get_official_name();
337 my $gene_symbol = undef;
338 my $gene_symbol_xref_id = undef;
341 my $dbentrie_sth = $self->get_dbentrie_sth($dbi);
344 my %xref_id_to_display;
346 $dbentrie_sth->execute($dbname, $gene_id,
"Gene");
347 my ($display, $xref_id, $object_xref_id, $level);
348 $dbentrie_sth->bind_columns(\$display, \$xref_id, \$object_xref_id, \$level);
355 while($dbentrie_sth->fetch){
357 push @list, $xref_id;
358 push @list_ox, $object_xref_id;
360 $xref_id_to_display{$xref_id} = $display;
361 if($level < $best_level){
364 $best_level = $level;
366 elsif($level == $best_level){
371 if(($count > 1) and (scalar(keys %ODN) == 1)){ # found one that is
"best" so set it and remove others
372 print
"For gene ".$gene_id_to_stable_id->{$gene_id}.
" we have mutiple ".$dbname.
"'s\n";
373 ($gene_symbol, $gene_symbol_xref_id) = $self->set_the_best_odns(\%ODN, \@list, \@list_ox, \%xref_id_to_display, $dbi);
374 if(defined($gene_symbol)){
375 return $gene_symbol, $gene_symbol_xref_id;
379 if(scalar(keys %ODN) == 1){ # one hgnc to
this gene - perfect case :-)
380 return $xref_id_to_display{(keys %ODN)[0]}, (keys %ODN)[0];
382 if(scalar(keys %ODN) > 1){
384 #if we have more than 1 xref, fail xrefs with worse % identity if we can (query or target identity whichever is greater)
385 my $identity_sth = $self->get_best_identity_sth($dbi);
386 $identity_sth->execute($dbname, $gene_id,
"Gene");
387 my ($xref_id, $best_identity);
388 $identity_sth->bind_columns(\$xref_id, \$best_identity);
389 my $temp_best_identity = 0;
392 while($identity_sth->fetch){
394 if($best_identity > $temp_best_identity){
396 $best_ids{$xref_id} = 1;
397 $temp_best_identity = $best_identity;
399 elsif($best_identity == $temp_best_identity){
400 $best_ids{$xref_id} = 1;
408 foreach my $xref_id (keys %ODN){
409 $best_list{$xref_id_to_display{$xref_id}} = 1;
412 # check if we were able to reduce the number of xrefs based on % identity
413 if ( scalar(keys %best_ids) > 0 && scalar(keys %best_ids) < scalar(keys %ODN) ) {
415 print
"For gene ".$gene_id_to_stable_id->{$gene_id}.
" we have mutiple ".$dbname.
"'s\n";
416 #set statuses for xrefs with worse % identity to MULTI_DELETE
417 ($gene_symbol, $gene_symbol_xref_id) = $self->set_the_best_odns(\%ODN, \@list, \@list_ox, \%xref_id_to_display, $dbi);
418 if( defined($gene_symbol) && scalar(keys %ODN == 1) ){
419 return $gene_symbol, $gene_symbol_xref_id;
423 # take the name which hasn't been already assigned to another gene, if possible
426 foreach my $x (keys %ODN){
427 if (!defined($official_name_used->{$x}) ) {
431 if ($xref_not_used) {
432 foreach my $x (keys %ODN){
433 print
"\t".$xref_id_to_display{$x};
434 if ($x == $xref_not_used) {
436 $gene_symbol = $xref_id_to_display{$x};
437 $gene_symbol_xref_id = $x;
439 print
" (left as $dbname reference but not gene symbol)\n";
446 foreach my $x (keys %ODN){
447 print
"\t".$xref_id_to_display{$x};
449 print
" (chosen as first)\n";
450 $gene_symbol = $xref_id_to_display{$x};
451 $gene_symbol_xref_id = $x;
454 print
" (left as $dbname reference but not gene symbol)\n";
462 return ($gene_symbol, $gene_symbol_xref_id);
466 ###########################################################
467 # Set the transcript display xrefs
469 # Use the gene symbol to create a transcript display xref
470 # Add 201 and increment.
471 ###########################################################
472 sub set_transcript_display_xrefs{
473 my ($self, $arg_ref) = @_;
475 my $max_xref_id = $arg_ref->{max_xref};
476 my $max_object_xref_id = $arg_ref->{max_object};
477 my $gene_id = $arg_ref->{gene_id};
478 my $gene_symbol = $arg_ref->{gene_symbol};
479 my $desc = $arg_ref->{desc};
480 my $source_id = $arg_ref->{source_id};
481 my $xref_added = $arg_ref->{xref_added};
482 my $seen_gene = $arg_ref->{seen_gene};
483 my $gene_to_transcripts = $arg_ref->{gene_to_tran};
484 my $tran_source = $arg_ref->{tran_source};
485 my $gene_id_to_stable_id = $arg_ref->{gene_id_to_stable_id};
486 my $dbi = $arg_ref->{dbi};
489 # statement handles needed
490 my $ins_xref_sth = $self->get_ins_xref_sth($dbi);
491 my $ins_dep_ix_sth = $self->get_ins_dep_ix_sth($dbi);
492 my $set_tran_display_xref_sth = $self->get_set_transcript_display_xref_sth($dbi);
493 my $ins_object_xref_sth = $self->get_ins_object_xref_sth($dbi);
495 if ($gene_id_to_stable_id->{$gene_id} =~ /LRG/) {
return; }
498 if(defined($seen_gene->{$gene_symbol})){
499 $ext = $seen_gene->{$gene_symbol};
502 foreach my $tran_id ( @{$gene_to_transcripts->{$gene_id}} ){
503 my $id = $gene_symbol.
"-".$ext;
504 if(!defined($source_id)){
505 croak
"id = $id\n but NO source_id for this entry for $tran_source???\n";
507 if(!defined($xref_added->{$id.
":".$source_id})){
509 $ins_xref_sth->execute($$max_xref_id, $source_id, $id, $id,
"", $desc);
510 $xref_added->{$id.
":".$source_id} = $$max_xref_id;
512 $set_tran_display_xref_sth->execute($xref_added->{$id.
":".$source_id}, $tran_id);
513 $$max_object_xref_id++;
514 $ins_object_xref_sth->execute($$max_object_xref_id, $tran_id,
'Transcript', $xref_added->{$id.
":".$source_id},undef);
515 $ins_dep_ix_sth->execute($$max_object_xref_id, 100, 100);
518 $seen_gene->{$gene_symbol} = $ext;
523 #################################################
524 # Get statement handle to retrieve what xrefs
525 # are attached to a specific ensembl_id and type
526 # for a particular source name
527 #################################################
528 sub get_dbentrie_sth{
534 SELECT x.label, x.xref_id, ox.object_xref_id, s.priority
535 FROM xref x, object_xref ox, source s
536 WHERE x.xref_id = ox.xref_id AND
537 x.source_id = s.source_id AND
539 ox.ox_status =
'DUMP_OUT' AND
540 ox.ensembl_id = ? AND
541 ox.ensembl_object_type = ?
543 my $sth = $dbi->prepare($sql);
547 #################################################
548 # Get statement handle to retrieve what xrefs
549 # are attached to a specific ensembl_id and type
550 # for a particular source name with description
551 #################################################
552 sub get_dbentrie_with_desc_sth{
558 SELECT x.label, x.xref_id, ox.object_xref_id, s.priority, x.description
559 FROM xref x, object_xref ox, source s
560 WHERE x.xref_id = ox.xref_id AND
561 x.source_id = s.source_id AND
563 ox.ox_status =
'DUMP_OUT' AND
564 ox.ensembl_id = ? AND
565 ox.ensembl_object_type = ?
567 my $sth = $dbi->prepare($sql);
571 #################################################
572 # Get statement handle to retrieve average of query
573 # and target identity for xrefs
574 #################################################
575 sub get_best_identity_sth{
580 SELECT x.xref_id, CASE WHEN ix.query_identity >= ix.target_identity
581 THEN ix.query_identity ELSE ix.target_identity END as best_identity
582 FROM xref x, object_xref ox, identity_xref ix, source s
583 WHERE x.xref_id = ox.xref_id AND x.source_id = s.source_id
584 AND ox.object_xref_id = ix.object_xref_id AND s.name = ?
585 AND ox.ox_status =
'DUMP_OUT' AND ox.ensembl_id = ?
586 AND ox.ensembl_object_type = ? order by best_identity DESC
588 my $sth = $dbi->prepare($sql);
593 #################################################
594 # Get statement handle to set the display xref
595 # for a transcript in the xref database.
596 # Stored in the transcript_stable_id table.
597 #################################################
598 sub get_set_transcript_display_xref_sth {
601 my $sth = $dbi->prepare(
'UPDATE transcript_stable_id SET display_xref_id =? where internal_id = ?');
606 #################################################
607 # Get statement handle to set the display xref
608 # for a gene in the xref database.
609 # Stored in the gene_stable_id table.
610 #################################################
611 sub get_set_gene_display_xref_sth {
614 my $sth = $dbi->prepare(
'UPDATE gene_stable_id SET display_xref_id =? where internal_id = ?');
619 ###############################################
620 # Get statement handle to insert an xref
621 ###############################################
622 sub get_ins_xref_sth{
626 my $sql =
"insert ignore into xref (xref_id, source_id, accession, label, version, species_id, info_type, info_text, description) values (?, ?, ?, ?, 0, ".$self->species_id.
", 'MISC', ?, ? )";
627 my $sth = $dbi->prepare($sql);
632 #################################################
633 # Get statement handle to insert an identity xref
634 #################################################
635 sub get_ins_dep_ix_sth{
639 my $sql =
"insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)";
640 my $sth = $dbi->prepare($sql);
644 ###############################################
645 # Get statement handle to insert an object_xref
646 ###############################################
647 sub get_ins_object_xref_sth{
651 my $sql =
"insert into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id, linkage_type, ox_status, unused_priority) values (?, ?, ?, ?, 'MISC', 'DUMP_OUT', ?)";
652 my $sth = $dbi->prepare($sql);
662 my ($max_object_xref_id, $max_object_xref_id2, $max_xref_id);
664 my $sth = $dbi->prepare(
"SELECT MAX(object_xref_id) FROM object_xref");
666 $sth->bind_columns(\$max_object_xref_id);
669 $sth = $dbi->prepare(
"SELECT MAX(object_xref_id) FROM identity_xref");
671 $sth->bind_columns(\$max_object_xref_id2);
676 $sth = $dbi->prepare(
"SELECT MAX(xref_id) FROM xref");
678 $sth->bind_columns(\$max_xref_id);
681 print
"MAX xref_id = $max_xref_id MAX object_xref_id = $max_object_xref_id, max_object_xref from identity_xref = $max_object_xref_id2\n";
682 return $max_object_xref_id, $max_xref_id;
686 my ($self, $synonym, $dbi) = @_;
688 my $dbname = $self->get_official_name();
690 my $syn_sql = (<<
"SYN");
691 SELECT es.synonym, x.label
692 FROM synonym es, xref x, source s
693 WHERE x.xref_id = es.xref_id AND
694 x.source_id = s.source_id AND
698 my $sth = $dbi->prepare($syn_sql);
701 $sth->bind_columns(\$syn,\$name);
703 $synonym->{$syn} = $name;
709 sub get_display_label_data{
710 # my ($self, $label_to_id, $label_to_desc) = @_;
711 my ($self, $label_to_desc, $dbi) = @_;
713 my $dbname = $self->get_official_name();
715 my $gd1_sql = (<<
"GD1");
716 SELECT x.accession, sy.synonym, x.description
717 FROM synonym sy, xref x, source so
718 WHERE x.xref_id = sy.xref_id AND
719 so.source_id = x.source_id AND
720 so.name like
'$dbname'
723 my $gd1_sth = $dbi->prepare($gd1_sql);
726 my ($display_label, $acc, $syn, $desc);
727 $gd1_sth->bind_columns(\$acc,\$display_label, \$desc);
728 while($gd1_sth->fetch){
729 # $label_to_id->{$display_label} = $acc;
730 $label_to_desc->{$display_label} = $desc;
736 # get label to id from xref database to start with.
737 my $gd2_sql = (<<
"GD2");
738 SELECT x.accession, x.label, x.description
739 FROM xref x, source s
740 WHERE s.source_id = x.source_id AND
741 s.name like
'$dbname'
744 my $gd2_sth = $dbi->prepare($gd2_sql);
747 $gd2_sth->bind_columns(\$acc,\$display_label, \$desc);
748 while($gd2_sth->fetch){
749 # $label_to_id->{$display_label} = $acc;
751 warn
"undef desc for $display_label\n";
754 $label_to_desc->{$display_label} = $desc;
761 sub get_other_name_hash{
764 if(!defined($self->{
'_other_name'})){
766 $self->{
'_other_name'} = \%hash;
768 return $self->{
'_other_name'};
774 sub find_from_other_sources{
775 my ($self, $ignore_object, $ref_args) = @_;
776 my $tran_source = $ref_args->{tran_source};
777 my $gene_id = $ref_args->{gene_id};
778 my $display_label_to_desc = $ref_args->{label_to_desc};
779 my $dbi = $ref_args->{dbi};
780 my %ignore_object = %{$ignore_object};
782 my ($gene_symbol, $gene_symbol_xref_id);
783 my $dbentrie_sth = $self->get_dbentrie_with_desc_sth($dbi);
784 my $other_name_num = $self->get_other_name_hash();
786 my ($display, $xref_id, $object_xref_id, $level, $desc);
788 foreach my $ext_db_name (qw(miRBase RFAM EntrezGene)){
789 $dbentrie_sth->execute($ext_db_name, $gene_id,
"Gene");
790 $dbentrie_sth->bind_columns(\$display, \$xref_id, \$object_xref_id, \$level, \$desc);
791 while($dbentrie_sth->fetch){
792 if (defined $found_gene{$gene_id}) {
795 if ($display =~ /^LOC/ || $display =~ /^SSC/) {
798 if (defined $ignore_object{$object_xref_id}) {
801 $gene_symbol = $display;
802 $gene_symbol_xref_id = $xref_id;
803 $$tran_source = $ext_db_name;
804 $display_label_to_desc->{$display} = $desc;
805 if(defined($other_name_num->{$gene_symbol})){
806 $other_name_num->{$gene_symbol}++;
809 $other_name_num->{$gene_symbol} = 1;
811 if ($ext_db_name eq
'miRBase' || $ext_db_name eq
'RFAM') {
812 $gene_symbol .=
".".$other_name_num->{$gene_symbol};
814 $found_gene{$gene_id} = 1;
818 return ($gene_symbol, $gene_symbol_xref_id);
823 # We do not delete this but set the status to "MULTI_DELETE"
826 sub get_delete_odn_sth{
827 my ($self, $dbi) = @_;
829 my $sth = $dbi->prepare(
'UPDATE object_xref SET ox_status = "MULTI_DELETE" where object_xref_id = ?');
833 sub set_the_best_odns{
834 my ($self, $odn, $ref_list, $ref_list_ox, $ref_xref_id_to_display, $dbi) = @_;
836 my $delete_odn_sth = $self->get_delete_odn_sth($dbi);
839 my $gene_symbol = undef;
840 my $gene_symbol_xref_id = undef;
842 while ($i < scalar(@{$ref_list})){
843 my $x = $ref_list->[$i];
844 if(!exists($ODN{$x})){
845 print
"\tremoving ".$ref_xref_id_to_display->{$x}.
" from gene\n";
846 #remove object xref....
847 $delete_odn_sth->execute($ref_list_ox->[$i])||
848 croak
"Could not set staus to MULTI_DELETE for object_xref ".$ref_list_ox->[$i].
"\n";
851 print
"\tKeeping the best one ".$ref_xref_id_to_display->{$x}.
"\n";
852 $gene_symbol = $ref_xref_id_to_display->{$x};
853 $gene_symbol_xref_id = $x;
857 return ($gene_symbol, $gene_symbol_xref_id);
860 ########################## START LRG BIT ######################################################
862 sub get_lrg_find_sth{
867 SELECT x.label, x.xref_id, ox.object_xref_id, s.priority
868 FROM xref x, object_xref ox, source s
869 WHERE x.xref_id = ox.xref_id AND
870 x.source_id = s.source_id AND
872 ox.ensembl_id = ? AND
873 ox.ensembl_object_type = ?
875 my $sth = $dbi->prepare($sql);
880 sub get_lrg_set_status_sth{
884 my $sth = $dbi->prepare(
"update object_xref set ox_status = 'NO_DISPLAY' where object_xref_id = ?");
888 sub get_lrg_to_hgnc_sth{
893 SELECT x.xref_id, s.priority
894 FROM xref x,source s, object_xref ox
895 WHERE x.xref_id = ox.xref_id AND
896 x.source_id = s.source_id AND
899 ox.ox_status =
'DUMP_OUT'
902 my $sth = $dbi->prepare($sql);
908 my ($self, $gene_id, $dbi) =@_;
910 my $gene_symbol_xref_id;
913 my $lrg_find_sth = $self->get_lrg_find_sth($dbi);
914 my $lrg_set_status_sth = $self->get_lrg_set_status_sth($dbi);
915 my $lrg_to_hgnc_sth = $self->get_lrg_to_hgnc_sth($dbi);
917 # look for LRG_HGNC_notransfer, if found then find HGNC equiv and set to this
918 # print "LRG FOUND with no HGNC, should have gotten this via the alt allele table?? gene_id = $gene_id\n";
919 $lrg_find_sth->execute(
"LRG_HGNC_notransfer", $gene_id,
"Gene");
920 my ($display, $xref_id, $object_xref_id, $level);
921 $lrg_find_sth->bind_columns(\$display, \$xref_id, \$object_xref_id, \$level);
922 while($lrg_find_sth->fetch){
923 $lrg_set_status_sth->execute($object_xref_id); # set oc_status to no _display as we
do not want
this transferred,
924 # just the equivalent hgnc
925 my $new_xref_id = undef;
927 $lrg_to_hgnc_sth->execute($display,
"HGNC");
928 $lrg_to_hgnc_sth->bind_columns(\$new_xref_id,\$pp);
929 $lrg_to_hgnc_sth->fetch;
930 if(defined($new_xref_id)){
931 $gene_symbol = $display;
932 $gene_symbol_xref_id = $new_xref_id;
936 return ($gene_symbol, $gene_symbol_xref_id, $is_lrg);
939 #############################END LRG BIT ################################################
942 # These are the ones added by official naming and hence
943 # Need to be removed incase they still exist from a previous run
945 sub get_new_dbname_sources{
949 my %dbname_to_source_id;
951 my $dbname = $self->get_official_name();
954 Clone_based_ensembl_gene
955 Clone_based_ensembl_transcript
958 EntrezGene_trans_name);
960 push @list, $dbname.
"_trans_name";
963 my $sth = $dbi->prepare(
"select source_id from source where name like ?");
965 my $source_error = 0;
966 foreach my $source (@list){
968 $sth->execute($source);
969 $sth->bind_columns(\$id);
972 warn
"Could not find external database name $source\n";
976 $dbname_to_source_id{$source} = $id;
980 carp
"Could not find name for $source_error database name.\nTherefore Exiting.\nPlease add these sources";
982 return \%dbname_to_source_id;
986 my ($self, $dbname_to_source_id, $dbi) = @_;
988 my $dbname = $self->get_official_name();
991 Clone_based_ensembl_gene
992 Clone_based_ensembl_transcript
993 EntrezGene_trans_name
997 push @sources, $dbname.
"_trans_name";
999 my @source_ids =
map {$dbname_to_source_id->{$_}} @sources;
1000 my $list = join(
", ",@source_ids);
1003 print
"LIST to delete $list\n";
1008 FROM synonym s, xref x
1009 WHERE s.xref_id = x.xref_id AND
1010 x.source_id in ( $list );
1013 my $sth = $dbi->prepare($sql);
1017 my $del_identity_sql =(<<
"DE2");
1019 FROM object_xref o, xref x, identity_xref i
1020 WHERE i.object_xref_id = o.object_xref_id AND
1021 x.xref_id = o.xref_id AND
1022 x.source_id in ( $list )
1024 $sth = $dbi->prepare($del_identity_sql);
1027 my $del_ox_sql = (<<
"DE3");
1029 FROM object_xref o, xref x
1030 WHERE x.xref_id = o.xref_id AND
1031 x.source_id in ( $list )
1033 $sth = $dbi->prepare($del_ox_sql);
1036 my $del_x_sql =
"delete x from xref x where x.source_id in ( $list )";
1038 $sth = $dbi->prepare($del_x_sql);
1044 sub reset_display_xrefs{
1048 my $sth = $dbi->prepare(
"update transcript_stable_id set display_xref_id = null");
1051 $sth = $self->xref->dbc->prepare(
"UPDATE gene_stable_id SET display_xref_id = null, desc_set =0");