ensembl-hive  2.7.0
VBCoordinateMapper.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 # $Id$
21 
22 # This is a set of subroutines used for creating Xrefs based on
23 # coordinate overlaps.
24 
25 package XrefMapper::VBCoordinateMapper;
26 
27 use strict;
28 use warnings;
29 
32 
33 use Carp;
34 use IO::File;
35 use File::Spec::Functions;
36 
37 use vars '@ISA';
38 
39 @ISA = qw{ XrefMapper::CoordinateMapper };
40 
41 
42 our @EXPORT = qw( run_coordinatemapping );
43 
44 our $coding_weight = 2;
45 our $ens_weight = 3;
46 
47 our $transcript_score_threshold = 0.75;
48 
49 sub run_coordinatemapping {
50  print STDERR "RUNNING VB COORD MAPPING\n";
51  my ( $mapper, $do_upload ) = @_;
52 
53  my $xref_db = $mapper->xref();
54  my $core_db = $mapper->core();
55 
56  my $species = $core_db->species();
57  my $species_id =
59  $species );
60 
61  # We only do coordinate mapping for mouse and human for now.
62  if ( !( $species eq 'mus_musculus' || $species eq 'homo_sapiens' ) ) {
63 # return;
64  }
65 
66  my $output_dir = $core_db->dir();
67 
68  my $xref_filename = catfile( $output_dir, 'xref_coord.txt' );
69  my $object_xref_filename =
70  catfile( $output_dir, 'object_xref_coord.txt' );
71  my $unmapped_reason_filename =
72  catfile( $output_dir, 'unmapped_reason_coord.txt' );
73  my $unmapped_object_filename =
74  catfile( $output_dir, 'unmapped_object_coord.txt' );
75 
76  my $xref_dbh = $xref_db->dbc()->db_handle();
77  my $core_dbh = $core_db->dbc()->db_handle();
78 
79  ######################################################################
80  # Figure out the last used 'xref_id', 'object_xref_id', #
81  # 'unmapped_object_id', and 'unmapped_reason_id' from the Core #
82  # database. #
83  ######################################################################
84 
85  my $xref_id =
86  $core_dbh->selectall_arrayref('SELECT MAX(xref_id) FROM xref')
87  ->[0][0];
88  my $object_xref_id = $core_dbh->selectall_arrayref(
89  'SELECT MAX(object_xref_id) FROM object_xref')->[0][0];
90  my $unmapped_object_id = $core_dbh->selectall_arrayref(
91  'SELECT MAX(unmapped_object_id) FROM unmapped_object')->[0][0];
92  my $unmapped_reason_id = $core_dbh->selectall_arrayref(
93  'SELECT MAX(unmapped_reason_id) FROM unmapped_reason')->[0][0];
94 
95  log_progress( "Last used xref_id is %d\n", $xref_id );
96  log_progress( "Last used object_xref_id is %d\n",
97  $object_xref_id );
98  log_progress( "Last used unmapped_object_id is %d\n",
99  $unmapped_object_id );
100  log_progress( "Last used unmapped_reason_id is %d\n",
101  $unmapped_reason_id );
102 
103  ######################################################################
104  # Get an 'analysis_id', or discover that we need to add our analysis #
105  # to the 'analyis' table later. #
106  ######################################################################
107 
108  my $analysis_params =
109  sprintf( "weights(coding,ensembl)="
110  . "%.2f,%.2f;"
111  . "transcript_score_threshold=" . "%.2f",
112  $coding_weight, $ens_weight, $transcript_score_threshold );
113 
114  my $analysis_sql = qq(
115  SELECT analysis_id
116  FROM analysis
117  WHERE logic_name = 'xrefcoordinatemapping'
118  AND parameters = ?
119  );
120 
121  my $analysis_sth = $core_dbh->prepare($analysis_sql);
122  $analysis_sth->execute($analysis_params);
123 
124  my $analysis_id = $analysis_sth->fetchall_arrayref()->[0][0];
125  if ( !defined($analysis_id) ) {
126  $analysis_id =
127  $core_dbh->selectall_arrayref( "SELECT analysis_id FROM analysis "
128  . "WHERE logic_name = 'xrefcoordinatemapping'" )->[0][0];
129 
130  if ( defined($analysis_id) && $do_upload ) {
131  log_progress( "Will update 'analysis' table "
132  . "with new parameter settings\n" );
133 
134  #-----------------------------------------------------------------
135  # Update an existing analysis.
136  #-----------------------------------------------------------------
137 
138  my $sql = qq(
139  UPDATE analysis
140  SET created = now(), parameters = ?
141  WHERE analysis_id = ?
142  );
143 
144  $core_dbh->do( $sql, undef, $analysis_params, $analysis_id );
145 
146  } else {
147  log_progress("Can not find analysis ID for this analysis:\n");
148  log_progress(" logic_name = 'xrefcoordinatemapping'\n");
149  log_progress( " parameters = '%s'\n", $analysis_params );
150 
151  if ($do_upload) {
152  #---------------------------------------------------------------
153  # Store a new analysis.
154  #---------------------------------------------------------------
155 
156  log_progress("A new analysis will be added\n");
157 
158  $analysis_id = $core_dbh->selectall_arrayref(
159  'SELECT MAX(analysis_id) FROM analysis')->[0][0];
160  log_progress( "Last used analysis_id is %d\n", $analysis_id );
161 
162  my $sql = 'INSERT INTO analysis '
163  . 'VALUES(?, now(), ?, \N, \N, \N, ?, \N, \N, ?, ?, \N, \N, \N)';
164  my $sth = $core_dbh->prepare($sql);
165 
166  $sth->execute( ++$analysis_id, 'xrefcoordinatemapping',
167  'CoordinateMapper.pm', $analysis_params,
168  'CoordinateMapper.pm' );
169  }
170  } ## end else [ if ( defined($analysis_id...
171  } ## end if ( !defined($analysis_id...
172 
173  if ( defined($analysis_id) ) {
174  log_progress( "Analysis ID is %d\n",
175  $analysis_id );
176  }
177 
178  ######################################################################
179  # Read and store available Xrefs from the Xref database. #
180  ######################################################################
181 
182  my %unmapped;
183  my %mapped;
184 
185  my $xref_sql = qq(
186  SELECT coord_xref_id, source_id, accession
187  FROM coordinate_xref
188  WHERE species_id = ?
189  );
190 
191  my $xref_sth = $xref_dbh->prepare($xref_sql);
192  $xref_sth->execute($species_id);
193 
194  while ( my $xref = $xref_sth->fetchrow_hashref() ) {
195  $unmapped{ $xref->{'coord_xref_id'} } = {
196  'external_db_id' =>
197  $XrefMapper::BasicMapper::source_to_external_db{ $xref->{
198  'source_id'} }
199  || 11000, # FIXME (11000 is 'UCSC')
200  'accession' => $xref->{'accession'},
201  'reason' => 'No overlap',
202  'reason_full' =>
203  'No coordinate overlap with any Ensembl transcript' };
204  }
205  $xref_sth->finish();
206 
207  ######################################################################
208  # Do coordinate matching. #
209  ######################################################################
210 
211  my $core_db_adaptor =
213  -host => $core_db->dbc()->host(),
214  -port => $core_db->dbc()->port(),
215  -user => $core_db->dbc()->username(),
216  -pass => $core_db->dbc()->password(),
217  -dbname => $core_db->dbc()->dbname(),
218  );
219 
220  my $slice_adaptor = $core_db_adaptor->get_SliceAdaptor();
221  my @chromosomes = @{ $slice_adaptor->fetch_all('Chromosome') };
222 
223  my $sql = qq(
224  SELECT coord_xref_id, accession,
225  txStart, txEnd,
226  cdsStart, cdsEnd,
227  exonStarts, exonEnds
228  FROM coordinate_xref
229  WHERE species_id = ?
230  AND chromosome = ? AND strand = ?
231  AND ((txStart >= ? AND txStart <= ?) -- txStart in region
232  OR (txEnd >= ? AND txEnd <= ?) -- txEnd in region
233  OR (txStart <= ? AND txEnd >= ?)) -- region is contained
234  ORDER BY accession
235  );
236 
237  foreach my $chromosome (@chromosomes) {
238  my $chr_name = $chromosome->seq_region_name();
239 
240  log_progress( "Processing chromsome '%s'\n", $chr_name );
241 
242  my @genes = @{ $chromosome->get_all_Genes( undef, undef, 1 ) };
243 
244  log_progress( "There are %4d genes on chromosome '%s'\n",
245  scalar(@genes), $chr_name );
246 
247  while ( my $gene = shift(@genes) ) {
248  my @transcripts = @{ $gene->get_all_Transcripts() };
249 
250  my %gene_result;
251 
252  foreach my $transcript ( sort { $a->start() <=> $b->start() }
253  @transcripts )
254  {
255  ################################################################
256  # For each Ensembl transcript: #
257  # 1. Register all Ensembl exons in a RangeRegistry. #
258  # #
259  # 2. Find all transcripts in the external database that are #
260  # within the range of this Ensembl transcript. #
261  # #
262  # For each of those external transcripts: #
263  # 3. Calculate the overlap of the exons of the external #
264  # transcript with the Ensembl exons using the #
265  # overlap_size() method in the RangeRegistry. #
266  # #
267  # 4. Register the external exons in their own RangeRegistry. #
268  # #
269  # 5. Calculate the overlap of the Ensembl exons with the #
270  # external exons as in step 3. #
271  # #
272  # 6. Calculate the match score. #
273  # #
274  # 7. Decide whether or not to keep the match. #
275  ################################################################
276 
277  my @exons = @{ $transcript->get_all_Exons() };
278 
279  my %transcript_result;
280 
281  # '$rr1' is the RangeRegistry holding Ensembl exons for one
282  # transcript at a time.
284 
285  my $coding_transcript;
286  if ( defined( $transcript->translation() ) ) {
287  $coding_transcript = 1;
288  } else {
289  $coding_transcript = 0;
290  }
291 
292  foreach my $exon (@exons) {
293  #-------------------------------------------------------------
294  # Register each exon in the RangeRegistry. Register both the
295  # total length of the exon and the coding range of the exon.
296  #-------------------------------------------------------------
297 
298  $rr1->check_and_register( 'exon', $exon->start(),
299  $exon->end() );
300 
301  if ( $coding_transcript
302  && defined( $exon->coding_region_start($transcript) )
303  && defined( $exon->coding_region_end($transcript) ) )
304  {
305  $rr1->check_and_register(
306  'coding',
307  $exon->coding_region_start($transcript),
308  $exon->coding_region_end($transcript) );
309  }
310  }
311 
312  #---------------------------------------------------------------
313  # Get hold of all transcripts from the external database that
314  # overlaps with this Ensembl transcript.
315  #---------------------------------------------------------------
316 
317  my $sth = $xref_dbh->prepare_cached($sql);
318  $sth->execute( $species_id, $chr_name,
319  $gene->strand(), $transcript->start(),
320  $transcript->end(), $transcript->start(),
321  $transcript->end(), $transcript->start(),
322  $transcript->end() );
323 
324  my ( $coord_xref_id, $accession, $txStart, $txEnd, $cdsStart,
325  $cdsEnd, $exonStarts, $exonEnds );
326 
327  $sth->bind_columns(
328  \( $coord_xref_id, $accession, $txStart, $txEnd,
329  $cdsStart, $cdsEnd, $exonStarts, $exonEnds
330  ) );
331 
332  while ( $sth->fetch() ) {
333  my @exonStarts = split( /,\s*/, $exonStarts );
334  my @exonEnds = split( /,\s*/, $exonEnds );
335  my $exonCount = scalar(@exonStarts);
336 
337  # '$rr2' is the RangeRegistry holding exons from the external
338  # transcript, for one transcript at a time.
340 
341  my $exon_match = 0;
342  my $coding_match = 0;
343 
344  my $coding_count = 0;
345 
346  for ( my $i = 0 ; $i < $exonCount ; ++$i ) {
347  #-----------------------------------------------------------
348  # Register the exons from the external database in the same
349  # was as with the Ensembl exons, and calculate the overlap
350  # of the external exons with the previously registered
351  # Ensembl exons.
352  #-----------------------------------------------------------
353 
354  my $overlap =
355  $rr1->overlap_size( 'exon', $exonStarts[$i],
356  $exonEnds[$i] );
357 
358  $exon_match +=
359  $overlap/( $exonEnds[$i] - $exonStarts[$i] + 1 );
360 
361  $rr2->check_and_register( 'exon', $exonStarts[$i],
362  $exonEnds[$i] );
363 
364  if ( !defined($cdsStart) || !defined($cdsEnd) ) {
365  # Non-coding transcript.
366  } else {
367  my $codingStart = ( $exonStarts[$i] > $cdsStart
368  ? $exonStarts[$i]
369  : $cdsStart );
370  my $codingEnd =
371  ( $exonEnds[$i] < $cdsEnd ? $exonEnds[$i] : $cdsEnd );
372 
373  if ( $codingStart < $codingEnd ) {
374  my $coding_overlap =
375  $rr1->overlap_size( 'coding', $codingStart,
376  $codingEnd );
377 
378  $coding_match +=
379  $coding_overlap/( $codingEnd - $codingStart + 1 );
380 
381  $rr2->check_and_register( 'coding', $codingStart,
382  $codingEnd );
383 
384  ++$coding_count;
385  }
386  }
387  } ## end for ( my $i = 0 ; $i < ...
388 
389  my $rexon_match = 0;
390  my $rcoding_match = 0;
391 
392  my $rcoding_count = 0;
393 
394  foreach my $exon (@exons) {
395  #-----------------------------------------------------------
396  # Calculate the overlap of the Ensembl exons with the
397  # external exons.
398  #-----------------------------------------------------------
399 
400  my $overlap =
401  $rr2->overlap_size( 'exon', $exon->start(),
402  $exon->end() );
403 
404  $rexon_match +=
405  $overlap/( $exon->end() - $exon->start() + 1 );
406 
407  if ( $coding_transcript
408  && defined( $exon->coding_region_start($transcript) )
409  && defined( $exon->coding_region_end($transcript) ) )
410  {
411  my $coding_overlap =
412  $rr2->overlap_size( 'coding',
413  $exon->coding_region_start(
414  $transcript),
415  $exon->coding_region_end(
416  $transcript)
417  );
418 
419  $rcoding_match +=
420  $coding_overlap/
421  ( $exon->coding_region_end($transcript) -
422  $exon->coding_region_start($transcript) +
423  1 );
424 
425  ++$rcoding_count;
426  }
427  } ## end foreach my $exon (@exons)
428 
429  #-------------------------------------------------------------
430  # Calculate the match score.
431  #-------------------------------------------------------------
432 
433  my $score = ( ( $exon_match + $ens_weight*$rexon_match ) +
434  $coding_weight*(
435  $coding_match + $ens_weight*$rcoding_match
436  )
437  )/( ( $exonCount + $ens_weight*scalar(@exons) ) +
438  $coding_weight*(
439  $coding_count + $ens_weight*$rcoding_count
440  ) );
441 
442  if ( !defined( $transcript_result{$coord_xref_id} )
443  || $transcript_result{$coord_xref_id} < $score )
444  {
445  $transcript_result{$coord_xref_id} = $score;
446  }
447 
448  } ## end while ( $sth->fetch() )
449  $sth->finish();
450 
451  #---------------------------------------------------------------
452  # Apply transcript threshold and pick the best match(es) for
453  # this transcript.
454  #---------------------------------------------------------------
455 
456  my $best_score;
457  foreach my $coord_xref_id (
458  sort( { $transcript_result{$b} <=> $transcript_result{$a} }
459  keys(%transcript_result) ) )
460  {
461 # my $score = $transcript_result{$coord_xref_id};
462 #
463 # if ( $score > $transcript_score_threshold ) {
464 # $best_score ||= $score;
465 #
466 # if ( sprintf( "%.3f", $score ) eq
467 # sprintf( "%.3f", $best_score ) )
468 # {
469  if ( exists( $unmapped{$coord_xref_id} ) ) {
470  $mapped{$coord_xref_id} = $unmapped{$coord_xref_id};
471  delete( $unmapped{$coord_xref_id} );
472  $mapped{$coord_xref_id}{'reason'} = undef;
473  $mapped{$coord_xref_id}{'reason_full'} = undef;
474  }
475 
476  push( @{ $mapped{$coord_xref_id}{'mapped_to'} }, {
477  'ensembl_id' => $transcript->dbID(),
478  'ensembl_object_type' => 'Transcript'
479  } );
480 
481 # # This is now a candidate Xref for the gene.
482 # if ( !defined( $gene_result{$coord_xref_id} )
483 # || $gene_result{$coord_xref_id} < $score )
484 # {
485 # $gene_result{$coord_xref_id} = $score;
486 # }
487 #
488 # } elsif ( exists( $unmapped{$coord_xref_id} ) ) {
489 # $unmapped{$coord_xref_id}{'reason'} =
490 # 'Was not best match';
491 # $unmapped{$coord_xref_id}{'reason_full'} =
492 # sprintf(
493 # "Did not top best transcript match score (%.2f)",
494 # $best_score );
495 # if ( !defined( $unmapped{$coord_xref_id}{'score'} )
496 # || $score > $unmapped{$coord_xref_id}{'score'} )
497 # {
498 # $unmapped{$coord_xref_id}{'score'} = $score;
499 # $unmapped{$coord_xref_id}{'ensembl_id'} =
500 # $transcript->dbID();
501 # }
502 # }
503 #
504 # } elsif ( exists( $unmapped{$coord_xref_id} )
505 # && $unmapped{$coord_xref_id}{'reason'} ne
506 # 'Was not best match' )
507 # {
508 # $unmapped{$coord_xref_id}{'reason'} =
509 # 'Did not meet threshold';
510 # $unmapped{$coord_xref_id}{'reason_full'} =
511 # sprintf( "Match score for transcript "
512 # . "lower than threshold (%.2f)",
513 # $transcript_score_threshold );
514 # if ( !defined( $unmapped{$coord_xref_id}{'score'} )
515 # || $score > $unmapped{$coord_xref_id}{'score'} )
516 # {
517 # $unmapped{$coord_xref_id}{'score'} = $score;
518 # $unmapped{$coord_xref_id}{'ensembl_id'} =
519 # $transcript->dbID();
520 # }
521 # }
522  } ## end foreach my $coord_xref_id (...
523 
524  } ## end foreach my $transcript ( sort...
525 
526  #-----------------------------------------------------------------
527  # Pick the best match(es) for this gene.
528  #-----------------------------------------------------------------
529 
530  my $best_score;
531  foreach my $coord_xref_id (
532  sort( { $gene_result{$b} <=> $gene_result{$a} }
533  keys(%gene_result) ) )
534  {
535 # my $score = $gene_result{$coord_xref_id};
536 #
537 # $best_score ||= $score;
538 #
539 # if (
540 # sprintf( "%.3f", $score ) eq sprintf( "%.3f", $best_score ) )
541 # {
542  push( @{ $mapped{$coord_xref_id}{'mapped_to'} }, {
543  'ensembl_id' => $gene->dbID(),
544  'ensembl_object_type' => 'Gene'
545  } );
546 # }
547  }
548 
549  } ## end while ( my $gene = shift(...
550  } ## end foreach my $chromosome (@chromosomes)
551 
552  # Make all dumps. Order is important.
553  dump_xref( $xref_filename, $xref_id, \%mapped, \%unmapped );
554  dump_object_xref( $object_xref_filename, $object_xref_id, \%mapped );
555  dump_unmapped_reason( $unmapped_reason_filename, $unmapped_reason_id,
556  \%unmapped );
557  dump_unmapped_object( $unmapped_object_filename, $unmapped_object_id,
558  $analysis_id, \%unmapped );
559 
560  if ($do_upload) {
561  upload_data( 'xref', $xref_filename, $core_dbh );
562  upload_data( 'object_xref', $object_xref_filename, $core_dbh );
563  upload_data( 'unmapped_reason', $unmapped_reason_filename,
564  $core_dbh );
565  upload_data( 'unmapped_object', $unmapped_object_filename,
566  $core_dbh );
567  }
568 
569 } ## end sub run_coordinatemapping
570 
571 #-----------------------------------------------------------------------
572 #-----------------------------------------------------------------------
573 
574 sub dump_xref {
575  my ( $filename, $xref_id, $mapped, $unmapped ) = @_;
576 
577  ######################################################################
578  # Dump for 'xref'. #
579  ######################################################################
580 
581  my $fh = IO::File->new( '>' . $filename )
582  or croak( sprintf( "Can not open '%s' for writing", $filename ) );
583 
584  log_progress( "Dumping for 'xref' to '%s'\n", $filename );
585 
586  foreach my $xref ( values( %{$unmapped} ), values( %{$mapped} ) ) {
587  # Assign 'xref_id' to this Xref.
588  $xref->{'xref_id'} = ++$xref_id;
589 
590  my $accession = $xref->{'accession'};
591 
592  my ($version) = ( $accession =~ /\.(\d+)$/ );
593  $version ||= 0;
594 
595  $fh->printf("%d\t%d\t%s\t%s\t%d\t%s\t%s\t%s\n",
596  $xref->{'xref_id'},
597  $xref->{'external_db_id'},
598  $accession,
599  $accession,
600  $version,
601  '\N',
602  'COORDINATE_OVERLAP',
603  '\N' # FIXME (possibly)
604  );
605  }
606  $fh->close();
607 
608  log_progress("Dumping for 'xref' done\n");
609 
610 } ## end sub dump_xref
611 
612 #-----------------------------------------------------------------------
613 
614 sub dump_object_xref {
615  my ( $filename, $object_xref_id, $mapped ) = @_;
616 
617  ######################################################################
618  # Dump for 'object_xref'. #
619  ######################################################################
620 
621  my $fh = IO::File->new( '>' . $filename )
622  or croak( sprintf( "Can not open '%s' for writing", $filename ) );
623 
624  log_progress( "Dumping for 'object_xref' to '%s'\n", $filename );
625 
626  foreach my $xref ( values( %{$mapped} ) ) {
627  foreach my $object_xref ( @{ $xref->{'mapped_to'} } ) {
628  # Assign 'object_xref_id' to this Object Xref.
629  $object_xref->{'object_xref_id'} = ++$object_xref_id;
630 
631  $fh->printf( "%d\t%d\t%s\t%d\t%s\n",
632  $object_xref->{'object_xref_id'},
633  $object_xref->{'ensembl_id'},
634  $object_xref->{'ensembl_object_type'},
635  $xref->{'xref_id'},
636  '\N' );
637  }
638  }
639  $fh->close();
640 
641  log_progress("Dumping for 'object_xref' done\n");
642 
643 } ## end sub dump_objexref
644 
645 #-----------------------------------------------------------------------
646 
647 sub dump_unmapped_reason {
648  my ( $filename, $unmapped_reason_id, $unmapped ) = @_;
649 
650  ######################################################################
651  # Dump for 'unmapped_reason'. #
652  ######################################################################
653 
654  # Create a list of the unique reasons.
655  my %reasons;
656 
657  foreach my $xref ( values( %{$unmapped} ) ) {
658  if ( !exists( $reasons{ $xref->{'reason_full'} } ) ) {
659  $reasons{ $xref->{'reason_full'} } = {
660  'summary' => $xref->{'reason'},
661  'full' => $xref->{'reason_full'}
662  };
663  }
664  }
665 
666  my $fh = IO::File->new( '>' . $filename )
667  or croak( sprintf( "Can not open '%s' for writing", $filename ) );
668 
669  log_progress( "Dumping for 'unmapped_reason' to '%s'\n", $filename );
670 
671  foreach my $reason (
672  sort( { $a->{'full'} cmp $b->{'full'} } values(%reasons) ) )
673  {
674  # Assign 'unmapped_reason_id' to this reason.
675  $reason->{'unmapped_reason_id'} = ++$unmapped_reason_id;
676 
677  $fh->printf( "%d\t%s\t%s\n", $reason->{'unmapped_reason_id'},
678  $reason->{'summary'}, $reason->{'full'} );
679 
680  }
681  $fh->close();
682 
683  log_progress("Dumping for 'unmapped_reason' done\n");
684 
685  # Assign reasons to the unmapped Xrefs from %reasons.
686  foreach my $xref ( values( %{$unmapped} ) ) {
687  $xref->{'reason'} = $reasons{ $xref->{'reason_full'} };
688  $xref->{'reason_full'} = undef;
689  }
690 
691 } ## end sub dump_unmapped_reason
692 
693 #-----------------------------------------------------------------------
694 
695 sub dump_unmapped_object {
696  my ( $filename, $unmapped_object_id, $analysis_id, $unmapped ) = @_;
697 
698  ######################################################################
699  # Dump for 'unmapped_object'. #
700  ######################################################################
701 
702  my $fh = IO::File->new( '>' . $filename )
703  or croak( sprintf( "Can not open '%s' for writing", $filename ) );
704 
705  log_progress( "Dumping for 'unmapped_object' to '%s'\n", $filename );
706 
707  foreach my $xref ( values( %{$unmapped} ) ) {
708  # Assign 'unmapped_object_id' to this Xref.
709  $xref->{'unmapped_object_id'} = ++$unmapped_object_id;
710 
711  $fh->printf(
712  "%d\t%s\t%s\t%d\t%s\t%d\t%s\t%s\t%s\t%s\t%s\n",
713  $xref->{'unmapped_object_id'},
714  'xref',
715  $analysis_id || '\N', # '\N' (NULL) means no analysis exists
716  # and uploading this table will fail.
717  $xref->{'external_db_id'},
718  $xref->{'accession'},
719  $xref->{'reason'}->{'unmapped_reason_id'}, (
720  defined( $xref->{'score'} )
721  ? sprintf( "%.3f", $xref->{'score'} )
722  : '\N'
723  ),
724  '\N',
725  $xref->{'ensembl_id'} || '\N',
726  ( defined( $xref->{'ensembl_id'} ) ? 'Transcript' : '\N' ),
727  '\N' );
728  }
729  $fh->close();
730 
731  log_progress("Dumping for 'unmapped_object' done\n");
732 
733 } ## end sub dump_unmapped_object
734 
735 #-----------------------------------------------------------------------
736 
737 sub upload_data {
738  my ( $table_name, $filename, $dbh ) = @_;
739 
740  ######################################################################
741  # Upload data from a file to a table. #
742  ######################################################################
743 
744  if ( !-r $filename ) {
745  croak( sprintf( "Can not open '%s' for reading", $filename ) );
746  }
747 
748  log_progress( "Uploading for '%s' from '%s'\n",
749  $table_name, $filename );
750 
751  my $sql =
752  sprintf( "LOAD DATA LOCAL INFILE ? REPLACE INTO TABLE %s", $table_name );
753 
754  my $sth = $dbh->prepare($sql);
755 
756  $sth->execute($filename);
757 
758  log_progress( "Uploading for '%s' done\n", $table_name );
759 
760 } ## end sub upload_data
761 
762 #-----------------------------------------------------------------------
763 
764 sub log_progress {
765  my ( $fmt, @params ) = @_;
766  printf( STDERR "COORD==> %s", sprintf( $fmt, @params ) );
767 }
768 
769 1;
Bio::EnsEMBL::Mapper::RangeRegistry
Definition: RangeRegistry.pm:51
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
accession
public accession()
XrefMapper::CoordinateMapper
Definition: CoordinateMapper.pm:8
Bio::EnsEMBL::Mapper::RangeRegistry::overlap_size
public Int overlap_size()
Bio::EnsEMBL::DBSQL::DBAdaptor::new
public Bio::EnsEMBL::DBSQL::DBAdaptor new()
XrefMapper::BasicMapper::get_species_id_from_species_name
public get_species_id_from_species_name()
Bio::EnsEMBL::Mapper::RangeRegistry::new
public Bio::EnsEMBL::Mapper::RangeRegistry new()
Bio::EnsEMBL::Mapper::RangeRegistry::check_and_register
public Undef check_and_register()