3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefMapper::BasicMapper;
42 my($class, @args) = @_;
54 Example : $mapper->
core($new_core);
55 Description: Getter / Setter
for the core.
56 info for the xref database.
63 my ($self, $arg) = @_;
66 ($self->{_xref} = $arg );
67 return $self->{_xref};
73 Example : $mapper->farm_queue(
"long");
74 Description: Getter / Setter
for the farm queue.
81 my ($self, $arg) = @_;
84 ($self->{_queue} = $arg );
85 return $self->{_queue};
91 Example : $mapper->exonerate(
"/usr/local/exonerate1.1.1");
92 Description: Getter / Setter
for the exonerate executable with full path.
99 my ($self, $arg) = @_;
102 ($self->{_exonerate} = $arg );
103 return $self->{_exonerate};
109 Example : $mapper->core($new_core);
110 Description: Getter / Setter
for the core.
111 info for the ensembl core database.
118 my ($self, $arg) = @_;
121 ($self->{_core} = $arg );
122 return $self->{_core};
128 Example : $mapper->previous_core($old_core);
129 Description: Getter / Setter
for the previous release of the core db.
136 my ($self, $arg) = @_;
139 ($self->{_previous_core} = $arg );
140 return $self->{_previous_core};
148 Example : $mapper->add_meta_pair(
"head_directory",
"/lustre/src/");
149 Description: Adds key value pairs to the database
157 my ($self, $key, $value) = @_;
159 my $sth = $self->xref->
dbc->prepare(
'insert into meta (meta_key, meta_value, date) values("'.$key.
'", "'.$value.
'", now())');
165 sub update_process_status{
166 my ($self, $value) = @_;
168 my $sth_stat = $self->xref->dbc->prepare(
"insert into process_status (status, date) values('".$value.
"',now())");
169 $sth_stat->execute();
174 sub xref_latest_status {
176 my $verbose = shift || 0;
178 my $sth = $self->xref->dbc->prepare(
"select id, status, date from process_status order by id");
181 my ($id, $status, $date);
182 $sth->bind_columns(\$id, \$status,\$date);
184 print
"$status\t$date\n" if($verbose and $self->verbose);
191 my ($self, $key) = @_;
193 my $sth = $self->xref->dbc->prepare(
'select meta_value from meta where meta_key like "'.$key.
'" order by meta_id');
197 $sth->bind_columns(\$value);
198 while($sth->fetch){ # get the last one
219 open my $fh,
"<", $file or croak (
"\nCannot open input file '$file':\n $!\n");
220 while( my $line = <$fh> ) {
223 next
if $line =~ /^#/;
226 my ($key, $value) = split(
"=",$line);
231 if($key eq
"species"){
233 $species_hash{
'species'} = $value;
235 elsif($key eq
"xref"){
238 elsif($key eq
"farm"){
241 elsif($type eq
"species"){ # processing species data
242 $species_hash{lc($key)} = $value;
244 elsif($type eq
"xref"){ # processing xref data
245 $xref_hash{lc($key)} = $value;
247 elsif($type eq
"farm"){
248 $farm_hash{lc($key)} = $value;
251 close $fh or croak
"Can't close file";
253 my $value = $species_hash{
'species'};
254 my $taxon = $species_hash{
'taxon'};
257 print STDERR
"\'$value\' is not a recognised species - please use full species name (e.g. homo_sapiens) in $file\n";
264 my $class =
"XrefMapper/$value.pm";
265 my $eval_test = eval {
268 if($@ or $eval_test != 1) {
269 if ($@ =~ /Can\
't locate $class/) {
270 if (defined $taxon) {
271 $class = "XrefMapper/$taxon.pm";
276 if ($@ =~ /Can\'t locate $class/) {
295 if ($use_basic or !defined $module) {
296 if(defined($verbose) and $verbose) {
297 my $warning_msg = "Did not find a specific mapping module XrefMapper::$value ";
298 if (defined $taxon) {
299 $warning_msg .= "or XrefMapper::$taxon ";
301 $warning_msg .= "- using XrefMapper::BasicMapper instead\n";
304 require XrefMapper::BasicMapper;
305 $module = "BasicMapper";
308 $mapper = "XrefMapper::$module"->new();
310 if(defined($farm_hash{'queue
'})){
311 $mapper->farm_queue($farm_hash{'queue
'});
313 if(defined($farm_hash{'exonerate
'})){
314 $mapper->exonerate($farm_hash{'exonerate
'});
318 if(defined($xref_hash{host}) ){
319 my ($host, $user, $dbname, $pass, $port);
320 $host = $xref_hash{'host
'};
321 $user = $xref_hash{'user
'};
322 $dbname = $xref_hash{'dbname
'};
323 if(defined($xref_hash{'password
'})){
324 $pass = $xref_hash{'password
'};
329 if(defined($xref_hash{'port
'})){
330 $port = $xref_hash{'port
'};
336 $xref = new XrefMapper::db(-host => $host,
343 $mapper->xref($xref);
344 $mapper->add_meta_pair("xref", $host.":".$dbname);
345 if(defined($xref_hash{'dir
'})){
346 $xref->dir($xref_hash{'dir
'});
347 if(!-d $xref_hash{'dir
'}){
348 croak "directory ".$xref_hash{'dir
'}." does not exist please create this\n";
352 croak "No directory specified for the xref fasta files\n";
357 croak "No host name given for xref database\n";
360 if(defined($species_hash{'species
'})){
362 my ($host, $port, $user, $dbname, $pass);
363 $host = $species_hash{'host
'};
364 $user = $species_hash{'user
'};
365 $dbname = $species_hash{'dbname
'};
366 if(defined($species_hash{'password
'})){
367 $pass = $species_hash{'password
'};
372 if(defined($species_hash{'port
'})){
373 $port = $species_hash{'port
'};
379 my $core = new XrefMapper::db(-host => $host,
386 $mapper->core($core);
388 $mapper->add_meta_pair("species", $host.":".$dbname);
390 if(defined($species_hash{'dir
'})){
391 $core->dir($species_hash{'dir
'});
392 if(!-d $species_hash{'dir
'}){
393 croak "directory ".$species_hash{'dir
'}." does not exist please create this\n";
397 croak "No directory specified for the ensembl fasta files\n";
400 $core->species($value);
402 #connect to previous release of core db if connection details specified in xref_input (pr_host, pr_port, pr_dbname, pr_user)
403 if (defined( $species_hash{'pr_host
'}) && defined( $species_hash{'pr_user
'}) && defined( $species_hash{'pr_dbname
'}) ) {
404 my ($pr_host, $pr_port, $pr_user, $pr_dbname);
405 $pr_host = $species_hash{'pr_host
'};
406 $pr_user = $species_hash{'pr_user
'};
407 $pr_dbname = $species_hash{'pr_dbname
'};
408 if(defined($species_hash{'pr_port
'})){
409 $pr_port = $species_hash{'pr_port
'};
412 my $previous_core = new XrefMapper::db(-host => $pr_host,
417 -dbname => $pr_dbname);
419 $mapper->previous_core($previous_core);
421 $mapper->add_meta_pair("species", $pr_host.":".$pr_dbname);
433 Example : $mapper->dumpcheck("yes");
434 Description: Getter / Setter for dumpcheck.
435 If set the mapper will not dump fasta files
436 if they exist already.
443 my ($self, $arg) = @_;
446 ($self->{_dumpcheck} = $arg );
447 return $self->{_dumpcheck};
451 my ($self, $arg) = @_;
454 ($self->{_nofarm} = $arg );
455 return $self->{_nofarm};
459 my ($self, $arg) = @_;
462 ($self->{_verbose} = $arg );
463 return $self->{_verbose};
467 my ($self, $arg) = @_;
470 ($self->{_species_id} = $arg );
471 return $self->{_species_id};
474 sub get_id_from_species_name {
475 my ($self, $species_name) = @_;
477 my $sql = "select species_id from species where name = '".$species_name."'";
478 my $sth = $self->xref->dbc->prepare($sql);
480 my @row = $sth->fetchrow_array();
483 $species_id = $row[0];
485 print STDERR "Couldn't get ID
for species
".$species_name."\n
";
486 print STDERR "It must be one of :-\n
";
487 $sql = "select name from species
";
488 $sth = $self->xref->dbc->prepare($sql);
490 while(my @row2 = $sth->fetchrow_array()){
491 print STDERR $row2[0]."\n
";
505 sub get_alt_alleles {
508 my $dba = $self->core->dba;
509 my $aaga = Bio::EnsEMBL::DBSQL::AltAlleleGroupAdaptor->new($dba);
511 my $aa_list = $aaga->fetch_all();
513 my $count = scalar(@$aa_list);
514 my %alt_id_to_gene_id;
515 my %gene_id_to_alt_id;
519 my $insert_sth = $self->xref->dbc->prepare("insert into alt_allele (alt_allele_id, gene_id, is_reference) values (?, ?,?)
");
522 $sth = $self->xref->dbc->prepare("delete from alt_allele
");
525 my $num_of_genes = 0;
527 # Iterate through all alt-allele groups, pushing unique alleles into the xref alt allele table.
528 # Track the reference gene IDs.
530 foreach my $aag (@$aa_list) {
531 my $ref_gene = $aag->rep_Gene_id();
532 # Representative gene not guaranteed, try to find an alternative best fit
534 my $genes = $aag->get_all_Genes;
535 foreach my $gene (@$genes) {
536 if ($gene->slice->is_reference) {
537 $ref_gene = $gene->dbID;
542 warn('Tried very hard but failed to select a representative gene for alt-allele-group '.$aag->dbID);
545 $is_reference{$ref_gene} = 1;
546 my $others = $aag->get_all_Gene_ids('no rep');
547 # Extra step in place to handle non-ref situations
548 my @cleaned_others = grep {!/$ref_gene/} @$others;
550 $insert_sth->execute($aag->dbID,$ref_gene,1);
553 foreach my $aa (@cleaned_others) {
554 $insert_sth->execute($aag->dbID,$aa,0);
558 if ($aag->dbID > $max_alt_id) { $max_alt_id = $aag->dbID }
561 print "$alt_added alleles found containing $num_of_genes genes\n
";
564 print "No alt_alleles found
for this species.\n
" ;
568 ### LRGs added as alt_alleles in the XREF system but never added to core.
571 # Use $max_alt_id for new ones.
575 SELECT ox.ensembl_id, g.gene_id
576 FROM xref x, object_xref ox, external_db e, gene g
577 WHERE x.xref_id = ox.xref_id AND
578 e.external_db_id = x.external_db_id AND
579 e.db_name like "Ens_Hs_gene
" AND
580 ox.ensembl_object_type = "Gene
" AND
581 x.display_label = g.stable_id
584 $sth = $self->core->dbc->prepare($sql);
585 my ($core_gene_id, $lrg_gene_id);
587 $sth->bind_columns(\$lrg_gene_id, \$core_gene_id);
595 # If the core gene is already in an alt_allele set then use that alt_id for the LRG gene only.
596 # Else use a new one and add both core and LRG.
600 while ($sth->fetch()){
601 my $aag = $aaga->fetch_by_gene_id($core_gene_id);
603 $insert_sth->execute($aag->dbID, $lrg_gene_id, 0);
606 $aag = $aaga->fetch_by_gene_id($lrg_gene_id);
608 $insert_sth->execute($aag->dbID, $lrg_gene_id, 1);
609 print "LRG perculiarity\t$core_gene_id\t$lrg_gene_id\n
";
613 $insert_sth->execute($max_alt_id, $lrg_gene_id, 0);
614 $insert_sth->execute($max_alt_id, $core_gene_id, 1);
623 print "Added $count alt_allels
for the lrgs. $old_count added to previous alt_alleles and $new_count
new ones\n
";
624 print "LRG problem count = $lrg_count\n
";
628 $self->update_process_status("alt_alleles_added
");
635 # Default behaviour is not to do the offical naming
636 # Overload this method in the species file returning the
637 # official database name to do so.
638 # (ie, human-> HGNC, mouse ->MGI, zebrafisf -> ZFIN_ID)
640 sub get_official_name {
647 # Biomart insists that a source is linked to only one ensembl
648 # object type (Gene, Transcript, Translation). So biomart_fix
649 # will move $dbnmae entry for type1 to type 2
650 # i.e. move all HGNC from transcripts to Genes.
653 my ($self, $db_name, $type1, $type2, $verbose, $xref_dbc) = @_;
654 $xref_dbc = $self->xref->dbc unless defined $xref_dbc;
656 print "$db_name is associated with both $type1 and $type2
object types\n
" if(defined($verbose));
657 print "$db_name moved to Gene level.\n
" if(!defined($verbose));
663 if($type1 eq "Gene
" or $type2 eq "Gene
"){
666 if($type1 eq "Translation
" or $type2 eq "Translation
"){
667 $from = "Translation
";
668 $from_id = "translation_id
"
671 $from = "Transcript
";
672 $from_id = "transcript_id
";
677 $to_id = "transcript_id
";
678 $from = "Translation
";
679 $from_id = "translation_id
";
682 if ($db_name eq 'GO' || $db_name eq 'goslim_goa') {
684 $from = 'Transcript';
685 $to_id = 'translation_id';
686 $from_id = 'transcript_id';
689 print "Therefore moving all associations from $from to
".$to."\n
" if(defined($verbose));
693 UPDATE IGNORE object_xref, gene_transcript_translation, xref, source
694 SET object_xref.ensembl_object_type = "$to
",
695 object_xref.ensembl_id = gene_transcript_translation.$to_id
696 WHERE object_xref.ensembl_object_type = "$from
" AND
697 object_xref.ensembl_id = gene_transcript_translation.$from_id AND
698 xref.xref_id = object_xref.xref_id AND
699 xref.source_id = source.source_id AND
700 object_xref.ox_status = "DUMP_OUT
" AND
701 source.name = "$db_name
";
703 my $result = $xref_dbc->do($sql) ;
705 if($db_name eq "GO
" || $db_name eq 'goslim_goa'){
707 DELETE object_xref, identity_xref
708 FROM object_xref, xref, source, identity_xref
709 WHERE object_xref.ensembl_object_type = "$from
" AND
710 identity_xref.object_xref_id = object_xref.object_xref_id AND
711 xref.xref_id = object_xref.xref_id AND
712 xref.source_id = source.source_id AND
713 object_xref.ox_status = "DUMP_OUT
" AND
714 source.name = "$db_name
";
717 $result = $xref_dbc->do($sql);
719 # Special tidying up for transcripts without translation
720 # The resulting object_xref does not have an ensembl_id to map to
723 DELETE object_xref, identity_xref
724 FROM object_xref, xref, source, identity_xref
725 WHERE object_xref.ensembl_object_type = "$to
" AND
726 identity_xref.object_xref_id = object_xref.object_xref_id AND
727 xref.xref_id = object_xref.xref_id AND
728 xref.source_id = source.source_id AND
729 object_xref.ensembl_id = 0 AND
730 object_xref.ox_status = "DUMP_OUT
" AND
731 source.name = "$db_name
";
736 DELETE object_xref, identity_xref
737 FROM xref, source, object_xref
738 LEFT JOIN identity_xref
739 ON identity_xref.object_xref_id = object_xref.object_xref_id
740 WHERE object_xref.ensembl_object_type = "$from
" AND
741 xref.xref_id = object_xref.xref_id AND
742 xref.source_id = source.source_id AND
743 object_xref.ox_status = "DUMP_OUT
" AND
744 source.name = "$db_name
";
747 $result = $xref_dbc->do($sql);
751 #delete dependent_xref
753 DELETE FROM dependent_xref WHERE object_xref_id NOT IN
754 (SELECT object_xref_id FROM object_xref);
761 # This sub finds which source lie on multiple ensembl obejct types
762 # and calls biomart_fix to fix this.
767 my $sql = 'SELECT ox.ensembl_object_type, COUNT(*), s.name FROM xref x, object_xref ox, source s WHERE x.xref_id = ox.xref_id AND s.source_id = x.source_id and ox.ox_status = "DUMP_OUT
" GROUP BY s.name, ox.ensembl_object_type';
774 my $sth = $self->xref->dbc->prepare($sql);
776 my ($type, $count, $name);
777 my ($last_type, $last_count, $last_name);
778 $sth->bind_columns(\$type,\$count,\$name);
779 $last_name = "DEFAULT
";
780 while ((!$again) and $sth->fetch){
781 if($last_name eq $name){
783 $self->biomart_fix($name,$last_type, $type, 1);
787 $last_count = $count;
792 my $tester = XrefMapper::TestMappings->new($self);
793 if($tester->unlinked_entries){
794 croak "Problems found before source_defined_move\n
";
797 $self->update_process_status('biomart_test_finished');
802 # Similar to above but just reports the problems.
803 # It does not fix them
809 my $sql = 'SELECT ox.ensembl_object_type, COUNT(*), s.name FROM xref x, object_xref ox, source s WHERE x.xref_id = ox.xref_id AND s.source_id = x.source_id and ox.ox_status = "DUMP_OUT
" GROUP BY s.name, ox.ensembl_object_type';
812 my $sth = $self->xref->dbc->prepare($sql);
815 my ($type, $count, $name);
816 my ($last_type, $last_count, $last_name);
817 $sth->bind_columns(\$type,\$count,\$name);
818 $last_name = "NOTKNOWN
";
821 if($last_name eq $name){
823 print STDERR "\nProblem Biomart test fails\n
";
826 print STDERR "$last_name\t$last_count\t$last_type\n
";
827 print STDERR "$name\t$count\t$type\n
";
831 $last_count = $count;
837 # remove a list of patterns from a string
838 sub filter_by_regexp {
840 my ($self, $str, $regexps) = @_;
842 foreach my $regexp (@$regexps) {
843 $str =~ s/$regexp//ig;
851 sub get_species_id_from_species_name{
852 my ($self,$species) = @_;
855 my $sql = "select species_id from species where name =
'".$species."'";
856 my $sth = $self->dbc->prepare($sql);
858 my @row = $sth->fetchrow_array();
861 $species_id = $row[0];
863 print STDERR "Couldn
't get ID for species ".$species."\n";
864 print STDERR "It must be one of :-\n";
865 $sql = "select name from species";
866 $sth = $self->dbc->prepare($sql);
868 while(my @row2 = $sth->fetchrow_array()){
869 print STDERR $row2[0]."\n";
881 my $keep_core_data = shift;
883 # remove all object_xref, identity_xref entries
885 my $sql = "TRUNCATE table object_xref";
886 my $sth = $self->xref->dbc->prepare($sql);
889 $sql = "TRUNCATE table identity_xref";
890 $sth = $self->xref->dbc->prepare($sql);
893 # remove all xrefs after PARSED_xref_id
894 # set dumped to NULL fro all xrefs.
896 my $max_xref_id = $self->get_meta_value("PARSED_xref_id");
899 $sql = "DELETE from xref where xref_id > $max_xref_id";
900 $sth = $self->xref->dbc->prepare($sql);
904 $sql = "UPDATE xref set dumped = null";
905 $sth = $self->xref->dbc->prepare($sql);
908 $sql = "DELETE from display_xref_priority";
909 $sth = $self->xref->dbc->prepare($sql);
913 $sql = "DELETE from gene_desc_priority";
914 $sth = $self->xref->dbc->prepare($sql);
918 if (!$keep_core_data) {
919 # remove all from core_info tables
920 # gene_transcript_translation
921 # [gene/transcript/translation]_stable_id
923 $sql = "DELETE from gene_transcript_translation";
924 $sth = $self->xref->dbc->prepare($sql);
927 $sql = "DELETE from gene_stable_id";
928 $sth = $self->xref->dbc->prepare($sql);
931 $sql = "DELETE from transcript_stable_id";
932 $sth = $self->xref->dbc->prepare($sql);
935 $sql = "DELETE from translation_stable_id";
936 $sth = $self->xref->dbc->prepare($sql);
942 sub remove_mapping_data{
945 my $sql = "DELETE from mapping_jobs";
946 my $sth = $self->xref->dbc->prepare($sql);
949 $sql = "DELETE from mapping";
950 $sth = $self->xref->dbc->prepare($sql);
953 $sql = "DELETE from alt_allele";
954 $sth = $self->xref->dbc->prepare($sql);
957 $sql = "DELETE from source_mapping_method";
958 $sth = $self->xref->dbc->prepare($sql);
965 sub revert_to_parsing_finished{
970 $self->remove_mapping_data();
972 $self->update_process_status('parsing_finished
');
978 sub revert_to_mapping_finished{
982 $self->clean_up(undef,1);
984 # set mapping jobs to SUBMITTED
985 my $sql = 'UPDATE mapping_jobs set status =
"SUBMITTED"';;
986 my $sth = $self->xref->dbc->prepare($sql);
989 $self->update_process_status('mapping_finished
');
994 # In case we have alt alleles with xefs, these will be direct ones
995 # we need to move all xrefs on to the reference
998 sub get_alt_allele_hashes{
1004 my $sql = "select alt_allele_id, gene_id, is_reference from alt_allele order by alt_allele_id, is_reference DESC";
1006 my $sth = $self->xref->dbc->prepare($sql);
1008 my ($alt_allele_id,$gene_id, $is_ref);
1009 $sth->bind_columns(\$alt_allele_id, \$gene_id, \$is_ref);
1010 my $last_alt_allele = 0;
1012 while($sth->fetch()){
1013 if( $alt_allele_id != $last_alt_allele) {
1014 #use the first non-reference gene if there is no reference gene in an alt_allele
1015 $ref_gene = $gene_id;
1017 $alt_to_ref{$gene_id} = $ref_gene;
1018 push @{$ref_to_alts{$ref_gene}}, $gene_id;
1020 $last_alt_allele = $alt_allele_id;
1024 return \%alt_to_ref, \%ref_to_alts;
1028 sub process_alt_alleles{
1031 $dbc = $self->xref->dbc unless defined $dbc;
1033 # ALL are on the Gene level now. This may change but for now it is okay.
1034 my ($alt_to_ref, $ref_to_alts) = $self->get_alt_allele_hashes();
1036 my $tester = XrefMapper::TestMappings->new($self);
1038 # Move the xrefs on to the reference Gene.
1039 # NOTE: Igonore used as the xref might already be on this Gene already and we do not want it to crash
1041 my $move_sql =(<<'MOVE
');
1042 UPDATE IGNORE object_xref ox, xref x, source s
1043 SET ox.ensembl_id = ?
1044 WHERE x.source_id = s.source_id AND
1045 ox.xref_id = x.xref_id AND
1046 ox.ensembl_id = ? AND
1047 ox.ensembl_object_type = 'Gene
' AND
1048 ox.ox_status = 'DUMP_OUT
' AND
1051 $move_sql .= "'".join("', '",$self->get_gene_specific_list()) . "')";
1053 print "MOVE SQL\n$move_sql\n";
1056 # Now where it was already on the Gene the ignore will have stopped the move
1057 # so we now want to just remove those ones as they already exist.
1059 my $del_ix_sql =(<<'DIX
');
1061 FROM identity_xref ix, object_xref ox, xref x, source s
1062 WHERE x.source_id = s.source_id AND
1063 ox.object_xref_id = ix.object_xref_id AND
1064 ox.xref_id = x.xref_id AND
1065 ox.ensembl_id = ? AND
1066 ox.ensembl_object_type = 'Gene
' AND
1067 ox.ox_status = 'DUMP_OUT
' AND
1070 $del_ix_sql .= "'".join("', '",$self->get_gene_specific_list()) . "')";
1072 my $del_sql =(<<'DEL
');
1074 FROM object_xref ox, xref x, source s
1075 WHERE x.source_id = s.source_id AND
1076 ox.xref_id = x.xref_id AND
1077 ox.ensembl_id = ? AND
1078 ox.ensembl_object_type = 'Gene
' AND
1079 ox.ox_status = 'DUMP_OUT
' AND
1082 $del_sql .= "'".join("', '",$self->get_gene_specific_list()) . "')";
1084 my $move_sth = $dbc->prepare($move_sql) || croak "$move_sql cannot be prepared";
1085 my $del_ix_sth = $dbc->prepare($del_ix_sql) || croak "$del_ix_sql cannot be prepared";
1086 my $del_sth = $dbc->prepare($del_sql) || croak "$del_sql cannot be prepared";
1089 my $del_ix_count = 0;
1090 my $del_ox_count = 0;
1091 foreach my $key (keys %$alt_to_ref){
1092 $move_sth->execute($alt_to_ref->{$key}, $key);
1093 $move_count += $move_sth->rows;
1095 $del_ix_sth->execute($key);
1096 $del_ix_count += $del_ix_sth->rows;
1098 $del_sth->execute($key);
1099 $del_ox_count += $del_sth->rows;
1103 $del_ix_sth->finish;
1105 print "Number of rows:- moved = $move_count, identitys deleted = $del_ix_count, object_xrefs deleted = $del_ox_count\n";
1107 # Now we have all the data on the reference Gene we want to copy all the data
1108 # onto the alt alleles.
1112 my $get_data_sql=(<<'GET
');
1113 SELECT ox.object_xref_id, ox.ensembl_object_type, ox.xref_id, ox.linkage_annotation,
1114 ox.linkage_type, ox.ox_status, ox.unused_priority, ox.master_xref_id,
1115 ix.query_identity, ix.target_identity, ix.hit_start, ix.hit_end,
1116 ix.translation_start, ix.translation_end, ix.cigar_line, ix.score, ix.evalue
1117 FROM xref x, source s, object_xref ox
1118 LEFT JOIN identity_xref ix ON ox.object_xref_id =ix.object_xref_id
1119 WHERE x.source_id = s.source_id AND
1120 ox.xref_id = x.xref_id AND
1121 ox.ensembl_id = ? AND
1122 ox.ox_status = 'DUMP_OUT
' AND
1123 ox.ensembl_object_type = 'Gene
' AND
1127 $get_data_sql .= "'".join("', '",$self->get_gene_specific_list()) . "')";
1129 my $get_data_sth = $self->xref->dbc->prepare($get_data_sql) || croak "Could not prepare $get_data_sql";
1133 my $insert_object_xref_sql =(<<'INO
');
1134 INSERT INTO object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id, linkage_annotation,
1135 linkage_type, ox_status, unused_priority, master_xref_id)
1136 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1139 my $insert_ox_sth = $self->xref->dbc->prepare($insert_object_xref_sql) || croak "Could not prepare $insert_object_xref_sql";
1142 my $insert_identity_xref_sql = (<<'INI
');
1143 INSERT INTO identity_xref (object_xref_id, query_identity, target_identity, hit_start, hit_end,
1144 translation_start, translation_end, cigar_line, score, evalue )
1145 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1148 my $insert_ix_sth = $self->xref->dbc->prepare($insert_identity_xref_sql) || croak "Could not prepare $insert_identity_xref_sql";
1152 my $max_object_xref_id;
1154 my $sth = $self->xref->dbc->prepare("SELECT MAX(object_xref_id) FROM object_xref");
1156 $sth->bind_columns(\$max_object_xref_id);
1158 if((!defined($max_object_xref_id)) or (!$max_object_xref_id)){
1159 croak "Problem getting max object_xref_id";
1161 $max_object_xref_id++;
1163 my $added_count = 0;
1165 foreach my $key (keys %$ref_to_alts){
1166 $get_data_sth->execute($key);
1167 my ($object_xref_id, $ensembl_object_type, $xref_id, $linkage_annotation,
1168 $linkage_type, $ox_status, $unused_priority, $master_xref_id,
1169 $query_identity, $target_identity, $hit_start, $hit_end,
1170 $translation_start, $translation_end, $cigar_line, $score, $evalue);
1172 $get_data_sth->bind_columns(\$object_xref_id, \$ensembl_object_type, \$xref_id, \$linkage_annotation,
1173 \$linkage_type, \$ox_status, \$unused_priority, \$master_xref_id,
1174 \$query_identity, \$target_identity, \$hit_start, \$hit_end,
1175 \$translation_start, \$translation_end, \$cigar_line, \$score, \$evalue);
1177 while( $get_data_sth->fetch()){
1178 foreach my $alt (@{$ref_to_alts->{$key}}){
1179 $max_object_xref_id++;
1180 $insert_ox_sth->execute($max_object_xref_id, $alt, $ensembl_object_type, $xref_id, $linkage_annotation,
1181 $linkage_type, $ox_status, $unused_priority, $master_xref_id) || croak "Could not insert object_xref data";
1183 #ONLY add identity xref if object_xref was added successfully.
1184 if( $insert_ox_sth->rows){
1186 $insert_ix_sth->execute($max_object_xref_id, $query_identity, $target_identity, $hit_start, $hit_end,
1187 $translation_start, $translation_end, $cigar_line, $score, $evalue) || croak "Could not insert identity_xref data";
1195 print "Added $added_count new mapping but ignored $ignored\n";
1197 if($tester->unlinked_entries){
1198 croak "Problems found after process_alt_alleles\n";
1201 $self->update_process_status('alt_alleles_processed
');
1207 # These sources should be on the gene, even if they are mapped transcript or translation.
1208 # We define which ones are to be moved here
1210 sub get_gene_specific_list {
1214 $dbi = $self->xref->dbc unless defined $dbi;
1216 my @list = qw(DBASS3 DBASS5 EntrezGene miRBase RFAM TRNASCAN_SE RNAMMER UniGene Uniprot_gn WikiGene MIM_GENE MIM_MORBID HGNC MGI ZFIN_ID FlyBaseName_gene RGD SGD_GENE VGNC wormbase_gseqname wormbase_locus Xenbase GeneCards);
1218 # Check the sources are used in the database considered
1219 my (@used_list, $sql, $sth, $count);
1220 foreach my $source (@list) {
1221 $sql = "SELECT COUNT(*) FROM xref x, source s WHERE s.source_id = x.source_id AND s.name = '$source
';";
1222 $sth = $dbi->prepare($sql);
1224 $sth->bind_columns(\$count);
1228 push @used_list, $source;
1238 # Here we do the moving.
1240 sub source_defined_move{
1244 foreach my $source ($self->get_gene_specific_list($dbi)){
1245 $self->biomart_fix($source,"Translation","Gene", undef, undef, $dbi);
1246 $self->biomart_fix($source,"Transcript","Gene", undef, undef, $dbi);
1248 my $tester = XrefMapper::TestMappings->new($self);
1249 if($tester->unlinked_entries){
1250 croak "Problems found after source_defined_move\n";
1252 $self->update_process_status('source_level_move_finished
');