3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefMapper::CoordinateMapper;
24 # This is a set of subroutines used for creating Xrefs based on
25 # coordinate overlaps.
34 use DBI qw( :sql_types );
38 use File::Spec::Functions;
43 our @EXPORT = qw( run_coordinatemapping );
45 our $coding_weight = 2;
48 our $transcript_score_threshold = 0.75;
53 my($class, $mapper) = @_;
57 $self->core($mapper->core);
58 $self->xref($mapper->xref);
59 $self->mapper($mapper);
60 $verbose = $mapper->verbose();
65 my ($self, $arg) = @_;
68 ($self->{_mapper} = $arg );
69 return $self->{_mapper};
73 sub run_coordinatemapping {
74 my ( $self, $do_upload, $species_id) = @_;
77 my $sth_stat = $self->xref->dbc->prepare(
78 "INSERT INTO process_status (status, date) "
79 .
"VALUES('coordinate_xrefs_started',now())" );
83 my $xref_db = $self->xref();
84 my $core_db = $self->core();
86 my $species = $core_db->species();
87 # my $species_id = $self->mapper->core->species;
89 if (!defined $species_id) {
return; }
92 # We only do coordinate mapping for mouse and human for now.
93 if ( !( $species eq
'mus_musculus' || $species eq
'homo_sapiens' ) ) {
94 # if ( !( $species eq 'mus_musculus') ) {
95 my $sth_stat = $self->xref->dbc->prepare(
96 "INSERT INTO process_status (status, date) "
97 .
"VALUES ('coordinate_xref_finished',now())" );
102 print
"species = $species ($species_id))\n";
104 my $output_dir = $core_db->dir();
106 my $xref_filename = catfile( $output_dir,
'xref_coord.txt' );
107 my $object_xref_filename =
108 catfile( $output_dir,
'object_xref_coord.txt' );
109 my $unmapped_reason_filename =
110 catfile( $output_dir,
'unmapped_reason_coord.txt' );
111 my $unmapped_object_filename =
112 catfile( $output_dir,
'unmapped_object_coord.txt' );
114 my $xref_dbh = $xref_db->dbc()->db_handle();
115 my $core_dbh = $core_db->dbc()->db_handle();
117 ######################################################################
118 # Figure out the last used 'xref_id', 'object_xref_id', #
119 # 'unmapped_object_id', and 'unmapped_reason_id' from the Core #
121 ######################################################################
124 $core_dbh->selectall_arrayref(
'SELECT MAX(xref_id) FROM xref')
126 my $object_xref_id = $core_dbh->selectall_arrayref(
127 'SELECT MAX(object_xref_id) FROM object_xref')->[0][0];
128 my $unmapped_object_id = $core_dbh->selectall_arrayref(
129 'SELECT MAX(unmapped_object_id) FROM unmapped_object')->[0][0];
130 my $unmapped_reason_id = $core_dbh->selectall_arrayref(
131 'SELECT MAX(unmapped_reason_id) FROM unmapped_reason')->[0][0];
133 log_progress(
"Last used xref_id is %d\n", $xref_id );
134 log_progress(
"Last used object_xref_id is %d\n",
136 log_progress(
"Last used unmapped_object_id is %d\n",
137 $unmapped_object_id );
138 log_progress(
"Last used unmapped_reason_id is %d\n",
139 $unmapped_reason_id );
141 ######################################################################
142 # Get an 'analysis_id', or discover that we need to add our analysis #
143 # to the 'analyis' table later. #
144 ######################################################################
146 my $analysis_params =
147 sprintf(
"weights(coding,ensembl)="
149 .
"transcript_score_threshold=" .
"%.2f",
150 $coding_weight, $ens_weight, $transcript_score_threshold );
152 my $analysis_sql = qq(
155 WHERE logic_name =
'xrefcoordinatemapping'
159 my $analysis_sth = $core_dbh->prepare($analysis_sql);
160 $analysis_sth->bind_param( 1, $analysis_params, SQL_VARCHAR );
162 $analysis_sth->execute();
164 my $analysis_id = $analysis_sth->fetchall_arrayref()->[0][0];
165 if ( !defined($analysis_id) ) {
167 $core_dbh->selectall_arrayref(
"SELECT analysis_id FROM analysis "
168 .
"WHERE logic_name = 'xrefcoordinatemapping'" )->[0][0];
170 if ( defined($analysis_id) && $do_upload ) {
171 log_progress(
"Will update 'analysis' table "
172 .
"with new parameter settings\n" );
174 #-----------------------------------------------------------------
175 # Update an existing analysis.
176 #-----------------------------------------------------------------
180 SET created = now(), parameters = ?
181 WHERE analysis_id = ?
184 $core_dbh->do( $sql, undef, $analysis_params, $analysis_id );
187 log_progress(
"Can not find analysis ID for this analysis:\n");
188 log_progress(
" logic_name = 'xrefcoordinatemapping'\n");
189 log_progress(
" parameters = '%s'\n", $analysis_params );
193 #---------------------------------------------------------------
194 # Store a new analysis.
195 #---------------------------------------------------------------
197 log_progress(
"A new analysis will be added\n");
199 $analysis_id = $core_dbh->selectall_arrayref(
200 'SELECT MAX(analysis_id) FROM analysis')->[0][0];
201 log_progress(
"Last used analysis_id is %d\n", $analysis_id );
204 'INSERT INTO analysis '
205 .
'VALUES(?, now(), ?, \N, \N, \N, ?, '
206 .
'\N, \N, ?, ?, \N, \N, \N)';
207 my $sth = $core_dbh->prepare($sql);
209 $sth->bind_param( 1, ++$analysis_id, SQL_INTEGER );
210 $sth->bind_param( 2,
'xrefcoordinatemapping', SQL_VARCHAR );
211 $sth->bind_param( 3,
'CoordinateMapper.pm', SQL_VARCHAR );
212 $sth->bind_param( 4, $analysis_params, SQL_VARCHAR );
213 $sth->bind_param( 5,
'CoordinateMapper.pm', SQL_VARCHAR );
217 } ## end
else [
if ( defined($analysis_id...
218 } ## end
if ( !defined($analysis_id...
220 if ( defined($analysis_id) ) {
221 log_progress(
"Analysis ID is %d\n",
225 ######################################################################
226 # Read and store available Xrefs from the Xref database. #
227 ######################################################################
233 SELECT coord_xref_id, source_id,
accession
238 my $xref_sth = $xref_dbh->prepare($xref_sql);
239 $xref_sth->bind_param( 1, $species_id, SQL_INTEGER );
241 $xref_sth->execute();
245 while ( my $xref = $xref_sth->fetchrow_hashref() ) {
247 $XrefMapper::BasicMapper::source_to_external_db{ $xref->{
249 $external_db_id ||= 11000; # FIXME (11000 is
'UCSC')
251 $unmapped{ $xref->{
'coord_xref_id'} } = {
252 'external_db_id' => $external_db_id,
253 'accession' => $xref->{
'accession'},
254 'reason' =>
'No overlap',
256 'No coordinate overlap with any Ensembl transcript'
261 if(!defined($external_db_id)){
262 die
"External_db_id is undefined species_id = $species_id\n";
265 ######################################################################
266 # Do coordinate matching. #
267 ######################################################################
269 my $core_db_adaptor =
271 -host => $core_db->dbc()->host(),
272 -port => $core_db->dbc()->port(),
273 -user => $core_db->dbc()->username(),
274 -pass => $core_db->dbc()->password(),
275 -dbname => $core_db->dbc()->dbname(),
278 my $slice_adaptor = $core_db_adaptor->get_SliceAdaptor();
279 my @chromosomes = @{ $slice_adaptor->fetch_all(
'Chromosome') };
288 AND chromosome = ? AND strand = ?
289 AND ((txStart BETWEEN ? AND ?) -- txStart in region
290 OR (txEnd BETWEEN ? AND ?) -- txEnd in region
291 OR (txStart <= ? AND txEnd >= ?)) -- region is fully contained
295 foreach my $chromosome (@chromosomes) {
296 my $chr_name = $chromosome->seq_region_name();
298 log_progress(
"Processing chromsome '%s'\n", $chr_name );
300 my @genes = @{ $chromosome->get_all_Genes( undef, undef, 1 ) };
302 log_progress(
"There are %4d genes on chromosome '%s'\n",
303 scalar(@genes), $chr_name );
305 while ( my $gene = shift(@genes) ) {
306 my @transcripts = @{ $gene->get_all_Transcripts() };
310 foreach my $transcript ( sort { $a->start() <=> $b->start() }
314 ################################################################
315 # For each Ensembl transcript: #
316 # 1. Register all Ensembl exons in a RangeRegistry. #
318 # 2. Find all transcripts in the external database that are #
319 # within the range of this Ensembl transcript. #
321 # For each of those external transcripts: #
322 # 3. Calculate the overlap of the exons of the external #
323 # transcript with the Ensembl exons using the #
324 # overlap_size() method in the RangeRegistry. #
326 # 4. Register the external exons in their own RangeRegistry. #
328 # 5. Calculate the overlap of the Ensembl exons with the #
329 # external exons as in step 3. #
331 # 6. Calculate the match score. #
333 # 7. Decide whether or not to keep the match. #
334 ################################################################
336 my @exons = @{ $transcript->get_all_Exons() };
338 my %transcript_result;
340 # '$rr1' is the RangeRegistry holding Ensembl exons for one
341 # transcript at a time.
344 my $coding_transcript;
345 if ( defined( $transcript->translation() ) ) {
346 $coding_transcript = 1;
348 $coding_transcript = 0;
351 foreach my $exon (@exons) {
353 #-------------------------------------------------------------
354 # Register each exon in the RangeRegistry. Register both the
355 # total length of the exon and the coding range of the exon.
356 #-------------------------------------------------------------
361 if ( $coding_transcript
362 && defined( $exon->coding_region_start($transcript) )
363 && defined( $exon->coding_region_end($transcript) ) )
365 $rr1->check_and_register(
367 $exon->coding_region_start($transcript),
368 $exon->coding_region_end($transcript) );
372 #---------------------------------------------------------------
373 # Get hold of all transcripts from the external database that
374 # overlaps with this Ensembl transcript.
375 #---------------------------------------------------------------
377 my $sth = $xref_dbh->prepare_cached($sql);
378 $sth->bind_param( 1, $species_id, SQL_INTEGER );
379 $sth->bind_param( 2, $chr_name, SQL_VARCHAR );
380 $sth->bind_param( 3, $gene->strand(), SQL_INTEGER );
381 $sth->bind_param( 4, $transcript->start(), SQL_INTEGER );
382 $sth->bind_param( 5, $transcript->end(), SQL_INTEGER );
383 $sth->bind_param( 6, $transcript->start(), SQL_INTEGER );
384 $sth->bind_param( 7, $transcript->end(), SQL_INTEGER );
385 $sth->bind_param( 8, $transcript->start(), SQL_INTEGER );
386 $sth->bind_param( 9, $transcript->end(), SQL_INTEGER );
390 my ( $coord_xref_id, $accession, $txStart, $txEnd, $cdsStart,
391 $cdsEnd, $exonStarts, $exonEnds );
394 \( $coord_xref_id, $accession, $txStart, $txEnd,
395 $cdsStart, $cdsEnd, $exonStarts, $exonEnds
398 while ( $sth->fetch() ) {
399 my @exonStarts = split( /,\s*/, $exonStarts );
400 my @exonEnds = split( /,\s*/, $exonEnds );
401 my $exonCount = scalar(@exonStarts);
403 # '$rr2' is the RangeRegistry holding exons from the external
404 # transcript, for one transcript at a time.
408 my $coding_match = 0;
410 my $coding_count = 0;
412 for ( my $i = 0 ; $i < $exonCount ; ++$i ) {
414 #-----------------------------------------------------------
415 # Register the exons from the external database in the same
416 # was as with the Ensembl exons, and calculate the overlap
417 # of the external exons with the previously registered
419 #-----------------------------------------------------------
426 $overlap/( $exonEnds[$i] - $exonStarts[$i] + 1 );
428 $rr2->check_and_register(
'exon', $exonStarts[$i],
431 if ( !defined($cdsStart) || !defined($cdsEnd) ) {
432 # Non-coding transcript.
434 my $codingStart = ( $exonStarts[$i] > $cdsStart
438 ( $exonEnds[$i] < $cdsEnd ? $exonEnds[$i] : $cdsEnd );
440 if ( $codingStart < $codingEnd ) {
442 $rr1->overlap_size(
'coding', $codingStart,
446 $coding_overlap/( $codingEnd - $codingStart + 1 );
448 $rr2->check_and_register(
'coding', $codingStart,
454 } ## end
for ( my $i = 0 ; $i < ...
457 my $rcoding_match = 0;
459 my $rcoding_count = 0;
461 foreach my $exon (@exons) {
463 #-----------------------------------------------------------
464 # Calculate the overlap of the Ensembl exons with the
466 #-----------------------------------------------------------
469 $rr2->overlap_size(
'exon', $exon->start(),
473 $overlap/( $exon->end() - $exon->start() + 1 );
475 if ( $coding_transcript
476 && defined( $exon->coding_region_start($transcript) )
477 && defined( $exon->coding_region_end($transcript) ) )
480 $rr2->overlap_size(
'coding',
481 $exon->coding_region_start(
483 $exon->coding_region_end(
489 ( $exon->coding_region_end($transcript) -
490 $exon->coding_region_start($transcript) +
495 } ## end
foreach my $exon (@exons)
497 #-------------------------------------------------------------
498 # Calculate the match score.
499 #-------------------------------------------------------------
501 my $score = ( ( $exon_match + $ens_weight*$rexon_match ) +
503 $coding_match + $ens_weight*$rcoding_match
505 )/( ( $exonCount + $ens_weight*scalar(@exons) ) +
507 $coding_count + $ens_weight*$rcoding_count
510 if ( !defined( $transcript_result{$coord_xref_id} )
511 || $transcript_result{$coord_xref_id} < $score )
513 $transcript_result{$coord_xref_id} = $score;
516 } ## end
while ( $sth->fetch() )
519 #---------------------------------------------------------------
520 # Apply transcript threshold and pick the best match(es) for
522 #---------------------------------------------------------------
525 foreach my $coord_xref_id (
526 sort( { $transcript_result{$b} <=> $transcript_result{$a} }
527 keys(%transcript_result) ) )
529 my $score = $transcript_result{$coord_xref_id};
531 if ( $score > $transcript_score_threshold ) {
532 $best_score ||= $score;
534 if ( sprintf(
"%.3f", $score ) eq
535 sprintf(
"%.3f", $best_score ) )
537 if ( exists( $unmapped{$coord_xref_id} ) ) {
538 $mapped{$coord_xref_id} = $unmapped{$coord_xref_id};
539 delete( $unmapped{$coord_xref_id} );
540 $mapped{$coord_xref_id}{
'reason'} = undef;
541 $mapped{$coord_xref_id}{
'reason_full'} = undef;
542 $mapped{$coord_xref_id}{
'chr_name'} = $chr_name;
545 push( @{ $mapped{$coord_xref_id}{
'mapped_to'} }, {
546 'ensembl_id' => $transcript->dbID(),
547 'ensembl_object_type' =>
'Transcript'
550 # This is now a candidate Xref for the gene.
551 if ( !defined( $gene_result{$coord_xref_id} )
552 || $gene_result{$coord_xref_id} < $score )
554 $gene_result{$coord_xref_id} = $score;
557 } elsif ( exists( $unmapped{$coord_xref_id} ) ) {
558 $unmapped{$coord_xref_id}{
'reason'} =
559 'Was not best match';
560 $unmapped{$coord_xref_id}{
'reason_full'} =
562 "Did not top best transcript match score (%.2f)",
564 if ( !defined( $unmapped{$coord_xref_id}{
'score'} )
565 || $score > $unmapped{$coord_xref_id}{
'score'} )
567 $unmapped{$coord_xref_id}{
'score'} = $score;
568 $unmapped{$coord_xref_id}{
'ensembl_id'} =
573 } elsif ( exists( $unmapped{$coord_xref_id} )
574 && $unmapped{$coord_xref_id}{
'reason'} ne
575 'Was not best match' )
577 $unmapped{$coord_xref_id}{
'reason'} =
578 'Did not meet threshold';
579 $unmapped{$coord_xref_id}{
'reason_full'} =
580 sprintf(
"Match score for transcript "
581 .
"lower than threshold (%.2f)",
582 $transcript_score_threshold );
583 if ( !defined( $unmapped{$coord_xref_id}{
'score'} )
584 || $score > $unmapped{$coord_xref_id}{
'score'} )
586 $unmapped{$coord_xref_id}{
'score'} = $score;
587 $unmapped{$coord_xref_id}{
'ensembl_id'} =
591 } ## end
foreach my $coord_xref_id (...
593 } ## end
foreach my $transcript ( sort...
595 #-----------------------------------------------------------------
596 # Pick the best match(es)
for this gene.
597 #-----------------------------------------------------------------
599 # Do not want them on both Transcripts and Genes. For Biomart purposes. (Ianl)
600 # So no need to find the best one
for the gene.
603 #
foreach my $coord_xref_id (
604 # sort( { $gene_result{$b} <=> $gene_result{$a} }
605 # keys(%gene_result) ) )
607 # my $score = $gene_result{$coord_xref_id};
609 # $best_score ||= $score;
612 # sprintf( "%.3f", $score ) eq sprintf( "%.3f", $best_score ) )
614 # push( @{ $mapped{$coord_xref_id}{'mapped_to'} }, {
615 # 'ensembl_id' => $gene->dbID(),
616 # 'ensembl_object_type' => 'Gene'
621 } ## end
while ( my $gene = shift(...
622 } ## end
foreach my $chromosome (@chromosomes)
624 # Make all dumps. Order is important.
625 dump_xref( $xref_filename, $xref_id, \%mapped, \%unmapped );
626 dump_object_xref( $object_xref_filename, $object_xref_id, $analysis_id, \%mapped );
627 dump_unmapped_reason( $unmapped_reason_filename, $unmapped_reason_id,
628 \%unmapped, $core_dbh );
629 dump_unmapped_object( $unmapped_object_filename, $unmapped_object_id,
630 $analysis_id, \%unmapped );
633 # Order is important!
634 upload_data(
'unmapped_reason', $unmapped_reason_filename,
635 $external_db_id, $core_dbh );
636 upload_data(
'unmapped_object', $unmapped_object_filename,
637 $external_db_id, $core_dbh );
638 upload_data(
'object_xref', $object_xref_filename, $external_db_id,
640 upload_data(
'xref', $xref_filename, $external_db_id, $core_dbh );
643 $sth_stat = $self->xref->dbc->prepare(
644 "INSERT INTO process_status (status, date) "
645 .
"VALUES ('coordinate_xref_finished',now())" );
646 $sth_stat->execute();
649 $self->biomart_fix(
"UCSC",
"Translation",
"Gene");
650 $self->biomart_fix(
"UCSC",
"Transcript",
"Gene");
652 } ## end sub run_coordinatemapping
654 #-----------------------------------------------------------------------
657 my ( $filename, $xref_id, $mapped, $unmapped ) = @_;
659 ######################################################################
661 ######################################################################
663 my $fh = IO::File->new(
'>' . $filename )
664 or croak( sprintf(
"Can not open '%s' for writing", $filename ) );
666 log_progress(
"Dumping for 'xref' to '%s'\n", $filename );
668 foreach my $xref ( values( %{$unmapped} ), values( %{$mapped} ) ) {
669 # Assign 'xref_id' to this Xref.
670 $xref->{
'xref_id'} = ++$xref_id;
672 my $accession = $xref->{
'accession'};
674 my ($version) = ( $accession =~ /\.(\d+)$/ );
677 my $info_text = (defined($xref->{
'chr_name'}) && $xref->{
'chr_name'} eq
'Y' ?
"Y Chromosome" :
"");
679 $fh->printf(
"%d\t%d\t%s\t%s\t%d\t%s\t%s\t%s\n",
681 $xref->{
'external_db_id'},
686 'COORDINATE_OVERLAP',
692 log_progress(
"Dumping for 'xref' done\n");
694 } ## end sub dump_xref
696 #-----------------------------------------------------------------------
698 sub dump_object_xref {
699 my ( $filename, $object_xref_id, $analysis_id, $mapped ) = @_;
701 ######################################################################
702 # Dump for 'object_xref'. #
703 ######################################################################
705 my $fh = IO::File->new(
'>' . $filename )
706 or croak( sprintf(
"Can not open '%s' for writing", $filename ) );
708 log_progress(
"Dumping for 'object_xref' to '%s'\n", $filename );
710 foreach my $xref ( values( %{$mapped} ) ) {
711 foreach my $object_xref ( @{ $xref->{
'mapped_to'} } ) {
712 # Assign 'object_xref_id' to this Object Xref.
713 $object_xref->{
'object_xref_id'} = ++$object_xref_id;
715 $fh->printf(
"%d\t%d\t%s\t%d\t%s\t%s\n",
716 $object_xref->{
'object_xref_id'},
717 $object_xref->{
'ensembl_id'},
718 $object_xref->{
'ensembl_object_type'},
726 log_progress(
"Dumping for 'object_xref' done\n");
728 } ## end sub dump_objexref
730 #-----------------------------------------------------------------------
732 sub dump_unmapped_reason {
733 my ( $filename, $unmapped_reason_id, $unmapped, $core_dbh ) = @_;
735 ######################################################################
736 # Dump for 'unmapped_reason'. #
737 ######################################################################
739 # Create a list of the unique reasons.
742 foreach my $xref ( values( %{$unmapped} ) ) {
743 if ( !exists( $reasons{ $xref->{
'reason_full'} } ) ) {
744 $reasons{ $xref->{
'reason_full'} } = {
745 'summary' => $xref->{
'reason'},
746 'full' => $xref->{
'reason_full'}
751 my $fh = IO::File->new(
'>' . $filename )
752 or croak( sprintf(
"Can not open '%s' for writing", $filename ) );
754 log_progress(
"Dumping for 'unmapped_reason' to '%s'\n", $filename );
757 $core_dbh->prepare(
'SELECT unmapped_reason_id '
758 .
'FROM unmapped_reason '
759 .
'WHERE full_description = ?' );
762 sort( { $a->{
'full'} cmp $b->{
'full'} } values(%reasons) ) )
764 # Figure out 'unmapped_reason_id' from the core database.
765 $sth->bind_param( 1, $reason->{
'full'}, SQL_VARCHAR );
770 $sth->bind_col( 1, \$id );
773 if ( defined($id) ) {
774 $reason->{
'unmapped_reason_id'} = $id;
776 $reason->{
'unmapped_reason_id'} = ++$unmapped_reason_id;
781 $fh->printf(
"%d\t%s\t%s\n", $reason->{
'unmapped_reason_id'},
782 $reason->{
'summary'}, $reason->{
'full'} );
787 log_progress(
"Dumping for 'unmapped_reason' done\n");
789 # Assign reasons to the unmapped Xrefs from %reasons.
790 foreach my $xref ( values( %{$unmapped} ) ) {
791 $xref->{
'reason'} = $reasons{ $xref->{
'reason_full'} };
792 $xref->{
'reason_full'} = undef;
795 } ## end sub dump_unmapped_reason
797 #-----------------------------------------------------------------------
799 sub dump_unmapped_object {
800 my ( $filename, $unmapped_object_id, $analysis_id, $unmapped ) = @_;
802 ######################################################################
803 # Dump for 'unmapped_object'. #
804 ######################################################################
806 my $fh = IO::File->new(
'>' . $filename )
807 or croak( sprintf(
"Can not open '%s' for writing", $filename ) );
809 log_progress(
"Dumping for 'unmapped_object' to '%s'\n", $filename );
811 foreach my $xref ( values( %{$unmapped} ) ) {
812 # Assign 'unmapped_object_id' to this Xref.
813 $xref->{
'unmapped_object_id'} = ++$unmapped_object_id;
816 "%d\t%s\t%s\t%d\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n",
817 $xref->{
'unmapped_object_id'},
819 $analysis_id ||
'\N', #
'\N' (NULL) means no analysis exists
820 # and uploading
this table will fail.
821 $xref->{
'external_db_id'},
822 $xref->{
'accession'},
823 $xref->{
'reason'}->{
'unmapped_reason_id'}, (
824 defined( $xref->{
'score'} )
825 ? sprintf(
"%.3f", $xref->{
'score'} )
829 $xref->{
'ensembl_id'} ||
'\N',
830 ( defined( $xref->{
'ensembl_id'} ) ?
'Transcript' :
'\N' ),
835 log_progress(
"Dumping for 'unmapped_object' done\n");
837 } ## end sub dump_unmapped_object
839 #-----------------------------------------------------------------------
842 my ( $table_name, $filename, $external_db_id, $dbh ) = @_;
844 ######################################################################
845 # Upload data from a file to a table. #
846 ######################################################################
848 if ( !-r $filename ) {
849 croak( sprintf(
"Can not open '%s' for reading", $filename ) );
852 my $cleanup_sql =
'';
853 if ( $table_name eq
'unmapped_reason' ) {
856 FROM unmapped_object uo,
858 WHERE uo.external_db_id = ?
859 AND ur.unmapped_reason_id = uo.unmapped_reason_id);
860 } elsif ( $table_name eq
'unmapped_object' ) {
863 FROM unmapped_object uo
864 WHERE uo.external_db_id = ?);
865 } elsif ( $table_name eq
'object_xref' ) {
870 WHERE x.external_db_id = ?
871 AND ox.xref_id = x.xref_id);
872 } elsif ( $table_name eq
'xref' ) {
876 WHERE x.external_db_id = ?);
878 croak( sprintf(
"Table '%s' is unknown\n", $table_name ) );
882 sprintf(
"LOAD DATA LOCAL INFILE ? REPLACE INTO TABLE %s",
886 "Removing old data (external_db_id = '%d') from table '%s'\n",
887 $external_db_id, $table_name );
889 my $rows = $dbh->do( $cleanup_sql, undef, $external_db_id )
890 or croak( $dbh->strerr() );
892 log_progress(
"Removed %d rows\n", $rows );
894 log_progress(
"Uploading for '%s' from '%s'\n",
895 $table_name, $filename );
897 $rows = $dbh->do( $load_sql, undef, $filename )
898 or croak( $dbh->errstr() );
900 $dbh->do(
"OPTIMIZE TABLE $table_name") or croak( $dbh->errstr() );
902 log_progress(
"Uploading for '%s' done (%d rows)\n",
903 $table_name, $rows );
905 } ## end sub upload_data
907 #-----------------------------------------------------------------------
910 my ( $fmt, @params ) = @_;
912 return if (!$verbose);
913 printf( STDERR
"COORD==> %s", sprintf( $fmt, @params ) );