ensembl-hive  2.7.0
ConversionSupport.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::Utils::ConversionSupport - Utility module for Vega release and
34 schema conversion scripts
35 
36 =head1 SYNOPSIS
37 
38  my $serverroot = '/path/to/ensembl';
39  my $support = new Bio::EnsEMBL::Utils::ConversionSupport($serverroot);
40 
41  # parse common options
42  $support->parse_common_options;
43 
44  # parse extra options for your script
45  $support->parse_extra_options( 'string_opt=s', 'numeric_opt=n' );
46 
47  # ask user if he wants to run script with these parameters
48  $support->confirm_params;
49 
50  # see individual method documentation for more stuff
51 
52 =head1 DESCRIPTION
53 
54 This module is a collection of common methods and provides helper
55 functions for the Vega release and schema conversion scripts. Amongst
56 others, it reads options from a config file, parses commandline options
57 and does logging.
58 
59 =head1 METHODS
60 
61 =cut
62 
63 package Bio::EnsEMBL::Utils::ConversionSupport;
64 
65 use strict;
66 use warnings;
67 no warnings 'uninitialized';
68 
69 use Getopt::Long;
70 use Text::Wrap;
71 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
72 use FindBin qw($Bin $Script);
73 use POSIX qw(strftime);
74 use Cwd qw(abs_path);
75 use DBI;
76 use Data::Dumper;
77 use Fcntl qw(:flock SEEK_END);
78 
79 my $species_c = 1; #counter to be used for each database connection made
80 
81 =head2 new
82 
83  Arg[1] : String $serverroot - root directory of your ensembl sandbox
84  Example : my $support = new Bio::EnsEMBL::Utils::ConversionSupport(
85  '/path/to/ensembl');
86  Description : constructor
87  Return type : Bio::EnsEMBL::Utils::ConversionSupport object
88  Exceptions : thrown if no serverroot is provided
89  Caller : general
90 
91 =cut
92 
93 sub new {
94  my $class = shift;
95  (my $serverroot = shift) or throw("You must supply a serverroot.");
96  my $self = {
97  '_serverroot' => $serverroot,
98  '_param' => { interactive => 1 },
99  '_warnings' => 0,
100  };
101  bless ($self, $class);
102  return $self;
103 }
104 
105 =head2 parse_common_options
106 
107  Example : $support->parse_common_options;
108  Description : This method reads options from a configuration file and parses
109  some commandline options that are common to all scripts (like
110  db connection settings, help, dry-run). Commandline options
111  will override config file settings.
112 
113  All options will be accessible via $self->param('name').
114  Return type : true on success
115  Exceptions : thrown if configuration file can't be opened
116  Caller : general
117 
118 =cut
119 
120 sub parse_common_options {
121  my $self = shift;
122 
123  # read commandline options
124  my %h;
125  Getopt::Long::Configure("pass_through");
126  &GetOptions( \%h,
127  'dbname|db_name=s',
128  'host|dbhost|db_host=s',
129  'port|dbport|db_port=n',
130  'user|dbuser|db_user=s',
131  'pass|dbpass|db_pass=s',
132  'prod_dbname=s',
133  'prod_host=s',
134  'prod_port=n',
135  'prod_user=s',
136  'prod_pass=s',
137  'conffile|conf=s',
138  'logfile|log=s',
139  'nolog',
140  'logpath=s',
141  'log_base_path=s',
142  'logappend|log_append',
143  'verbose|v',
144  'interactive|i=s',
145  'hideparamlist=s',
146  'joblog=s',
147  'dry_run|dry|n',
148  'help|h|?',
149  );
150 
151  # reads config file
152  my $conffile = $h{'conffile'} || $self->serverroot . "/sanger-plugins/vega/conf/ini-files/Conversion.ini";
153  $conffile = abs_path($conffile);
154  if (-e $conffile) {
155  open(my $fh, '<', $conffile) or throw(
156  "Unable to open configuration file $conffile for reading: $!");
157  my $serverroot = $self->serverroot;
158  while (<$fh>) {
159  chomp;
160 
161  # remove comments
162  s/^[#;].*//;
163  s/\s+[;].*$//;
164 
165  # read options into internal parameter datastructure, removing whitespace
166  next unless (/(\w\S*)\s*=\s*(\S*)\s*/);
167  my $name = $1;
168  my $val = $2;
169  if ($val =~ /\$SERVERROOT/) {
170  $val =~ s/\$SERVERROOT/$serverroot/g;
171  $val = abs_path($val);
172  }
173  $self->param($name, $val);
174  }
175  close $fh;
176  $self->param('conffile', $conffile);
177  }
178  elsif ($conffile) {
179  warning("Unable to open configuration file $conffile for reading: $!");
180  }
181 
182 # override configured parameter with commandline options
183  map { $self->param($_, $h{$_}) } keys %h;
184 
185  return (1) if $self->param('nolog');
186 
187  # if logpath & logfile are not set, set them here to /ensemblweb/vega_dev/shared/logs/conversion/DBNAME/SCRIPNAME_NN.log
188  if (! defined($self->param('log_base_path'))) {
189  $self->param('log_base_path','/ensemblweb/shared/logs/conversion/');
190  }
191  my $dbname = $self->param('dbname');
192  $dbname =~ s/^vega_//;
193  if (not (defined($self->param('logpath')) )){
194  $self->param('logpath', $self->param('log_base_path')."/".$dbname."/" );
195  }
196  if ( not defined $self->param('logfile') ){
197  my $log = $Script;
198  $log =~ s/.pl$//g;
199  my $counter;
200  for ($counter=1 ; (-e $self->param('logpath')."/".$log."_".sprintf("%03d", $counter).".log"); $counter++){
201  # warn $self->param('logpath')."/".$log."_".$counter.".log";
202  }
203  $self->param('logfile', $log."_".sprintf("%03d", $counter).".log");
204  }
205  return(1);
206 }
207 
208 =head2 parse_extra_options
209 
210  Arg[1-N] : option descriptors that will be passed on to Getopt::Long
211  Example : $support->parse_extra_options('string_opt=s', 'numeric_opt=n');
212  Description : Parse extra commandline options by passing them on to
213  Getopt::Long and storing parameters in $self->param('name).
214  If the last option is '...' then unknown options are kept
215  in @ARGV for future calls to this method. Without '...'
216  then it is an error to have further options.
217  Return type : true on success
218  Exceptions : none (caugth by $self->error)
219  Caller : general
220 
221 =cut
222 
223 sub parse_extra_options {
224  my ($self, @params) = @_;
225  if(@params and $params[-1] eq "...") {
226  pop @params;
227  Getopt::Long::Configure("pass_through");
228  } else {
229  Getopt::Long::Configure("no_pass_through");
230  }
231  eval {
232  # catch warnings to pass to $self->error
233  local $SIG{__WARN__} = sub { die @_; };
234  &GetOptions(\%{ $self->{'_param'} }, @params);
235  };
236  $self->error($@) if $@;
237  return(1);
238 }
239 
240 =head2 allowed_params
241 
242  Arg[1-N] : (optional) List of allowed parameters to set
243  Example : my @allowed = $self->allowed_params(qw(param1 param2));
244  Description : Getter/setter for allowed parameters. This is used by
245  $self->confirm_params() to avoid cluttering of output with
246  conffile entries not relevant for a given script. You can use
247  $self->get_common_params() as a shortcut to set them.
248  Return type : Array - list of allowed parameters
249  Exceptions : none
250  Caller : general
251 
252 =cut
253 
254 sub allowed_params {
255  my $self = shift;
256 
257  # setter
258  if (@_) {
259  @{ $self->{'_allowed_params'} } = @_;
260  }
261 
262  # getter
263  if (ref($self->{'_allowed_params'}) eq 'ARRAY') {
264  return @{ $self->{'_allowed_params'} };
265  } else {
266  return ();
267  }
268 }
269 
270 =head2 get_common_params
271 
272  Example : my @allowed_params = $self->get_common_params, 'extra_param';
273  Description : Returns a list of commonly used parameters in the conversion
274  scripts. Shortcut for setting allowed parameters with
275  $self->allowed_params().
276  Return type : Array - list of common parameters
277  Exceptions : none
278  Caller : general
279 
280 =cut
281 
282 sub get_common_params {
283  return qw(
284  conffile
285  dbname
286  host
287  port
288  user
289  pass
290  nolog
291  logpath
292  log_base_path
293  logfile
294  logappend
295  verbose
296  interactive
297  hideparamlist
298  joblog
299  dry_run
300  );
301 }
302 
303 =head2 get_loutre_params
304 
305  Arg : (optional) return a list to parse or not
306  Example : $support->parse_extra_options($support->get_loutre_params('parse'))
307  Description : Returns a list of commonly used loutre db parameters - parse option is
308  simply used to distinguish between reporting and parsing parameters
309  Return type : Array - list of common parameters
310  Exceptions : none
311  Caller : general
312 
313 =cut
314 
315 sub get_loutre_params {
316  my ($self,$p) = @_;
317  if ($p) {
318  return qw(
319  loutrehost=s
320  loutreport=s
321  loutreuser=s
322  loutrepass:s
323  loutredbname=s
324  );
325  }
326  else {
327  return qw(
328  loutrehost
329  loutreport
330  loutreuser
331  loutrepass
332  loutredbname
333  );
334  }
335 }
336 
337 =head2 get_annotrack_params
338 
339  Arg : (optional) return a list to parse or not
340  Example : $support->parse_extra_options($support->get_annotrack_params('parse'))
341  Description : Returns a list of parameters used to connect to annotrack
342  Return type : Array - list of common parameters
343  Exceptions : none
344  Caller : general
345 
346 =cut
347 
348 sub get_annotrack_params {
349  my ($self,$p) = @_;
350  if ($p) {
351  return qw(
352  annotrackhost=s
353  annotrackport=s
354  annotrackuser=s
355  annotrackpass=s
356  annotrackdbname=s
357  ignoreannotrack
358  );
359  }
360  else {
361  return qw(
362  annotrackhost
363  annotrackport
364  annotrackuser
365  annotrackpass
366  annotrackdbname
367  ignoreannotrack
368  );
369  }
370 }
371 
372 =head2 remove_vega_params
373 
374  Example : $support->remove_vega_params;
375  Description : Removes Vega db conection parameters. Usefull to avoid clutter in log files when
376  working exclusively with loutre
377  Return type : none
378  Exceptions : none
379  Caller : general
380 
381 =cut
382 
383 sub remove_vega_params {
384  my $self = shift;
385  foreach my $param (qw(dbname host port user pass)) {
386  $self->{'_param'}{$param} = undef;
387  }
388 }
389 
390 =head2 confirm_params
391 
392  Example : $support->confirm_params;
393  Description : Prints a table of parameters that were collected from config
394  file and commandline and asks user to confirm if he wants
395  to proceed.
396  Return type : true on success
397  Exceptions : none
398  Caller : general
399 
400 =cut
401 
402 sub confirm_params {
403  my $self = shift;
404 
405  # print parameter table
406  return 1 if($self->param('hideparamlist') and not $self->param('interactive'));
407  print "Running script with these parameters:\n\n";
408  print $self->list_all_params;
409 
410  if ($self->param('host') eq 'ensweb-1-10') {
411  # ask user if he wants to proceed
412  exit unless $self->user_proceed("**************\n\n You're working on ensdb-1-10! Is that correct and you want to continue ?\n\n**************");
413  }
414  else {
415  # ask user if he wants to proceed
416  exit unless $self->user_proceed("Continue?");
417  }
418  return(1);
419 }
420 
421 =head2 list_all_params
422 
423  Example : print LOG $support->list_all_params;
424  Description : prints a table of the parameters used in the script
425  Return type : String - the table to print
426  Exceptions : none
427  Caller : general
428 
429 =cut
430 
431 sub list_all_params {
432  my $self = shift;
433  my $txt = sprintf " %-25s%-110s\n", qw(PARAMETER VALUE);
434  $txt .= " " . "-"x136 . "\n";
435  $Text::Wrap::columns = 130;
436  my @params = $self->allowed_params;
437  foreach my $key (@params) {
438  my @vals = $self->param($key);
439  if (@vals) {
440  $txt .= Text::Wrap::wrap( sprintf(' %-25s', $key),
441  ' 'x28,
442  join(", ", @vals)
443  ) . "\n";
444  }
445  }
446  $txt .= "\n";
447  return $txt;
448 }
449 
450 =head2 create_commandline_options
451 
452  Arg[1] : Hashref $settings - hashref describing what to do
453  Allowed keys:
454  allowed_params => 0|1 # use all allowed parameters
455  exclude => [] # listref of parameters to exclude
456  replace => {param => newval} # replace value of param with
457  # newval
458  Example : $support->create_commandline_options({
459  allowed_params => 1,
460  exclude => ['verbose'],
461  replace => { 'dbname' => 'homo_sapiens_vega_33_35e' }
462  });
463  Description : Creates a commandline options string that can be passed to any
464  other script using ConversionSupport.
465  Return type : String - commandline options string
466  Exceptions : none
467  Caller : general
468 
469 =cut
470 
471 sub create_commandline_options {
472  my ($self, $settings, $param_hash) = @_;
473  my %param_hash = $param_hash ? %$param_hash : ();
474 
475  # get all allowed parameters
476  if ($settings->{'allowed_params'}) {
477  # exclude params explicitly stated
478  my %exclude = map { $_ => 1 } @{ $settings->{'exclude'} || [] };
479  foreach my $param ($self->allowed_params) {
480  unless ($exclude{$param}) {
481  my ($first, @rest) = $self->param($param);
482  next unless (defined($first));
483 
484  if (@rest) {
485  $first = join(",", $first, @rest);
486  }
487  $param_hash{$param} = $first;
488  }
489  }
490  }
491 
492  # replace values
493  foreach my $key (keys %{ $settings->{'replace'} || {} }) {
494  $param_hash{$key} = $settings->{'replace'}->{$key};
495  }
496 
497  # create the commandline options string
498  my $options_string;
499  foreach my $param (keys %param_hash) {
500  $options_string .= sprintf("--%s %s ", $param, $param_hash{$param});
501  }
502  return $options_string;
503 }
504 
505 =head2 check_required_params
506 
507  Arg[1-N] : List @params - parameters to check
508  Example : $self->check_required_params(qw(dbname host port));
509  Description : Checks $self->param to make sure the requested parameters
510  have been set. Dies if parameters are missing.
511  Return type : true on success
512  Exceptions : none
513  Caller : general
514 
515 =cut
516 
517 sub check_required_params {
518  my ($self, @params) = @_;
519  my @missing = ();
520  foreach my $param (@params) {
521  push @missing, $param unless defined $self->param($param);
522  }
523  if (@missing) {
524  throw("Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
525  }
526  return(1);
527 }
528 
529 =head2 user_proceed
530 
531  Arg[1] : (optional) String $text - notification text to present to user
532  Example : # run a code snipped conditionally
533  if ($support->user_proceed("Run the next code snipped?")) {
534  # run some code
535  }
536 
537  # exit if requested by user
538  exit unless ($support->user_proceed("Want to continue?"));
539  Description : If running interactively, the user is asked if he wants to
540  perform a script action. If he doesn't, this section is skipped
541  and the script proceeds with the code. When running
542  non-interactively, the section is run by default.
543  Return type : TRUE to proceed, FALSE to skip.
544  Exceptions : none
545  Caller : general
546 
547 =cut
548 
549 sub user_proceed {
550  my ($self, $text) = @_;
551 
552  if ($self->param('interactive')) {
553  print "$text\n" if $text;
554  print "[y/N] ";
555  my $input = lc(<>);
556  chomp $input;
557  unless ($input eq 'y') {
558  print "Skipping.\n";
559  return(0);
560  }
561  }
562 
563  return(1);
564 }
565 
566 
567 =head2 read_user_input
568 
569  Arg[1] : (optional) String $text - notification text to present to user
570  Example : my $ret = $support->read_user_input("Choose a number [1/2/3]");
571  if ($ret == 1) {
572  # do something
573  } elsif ($ret == 2) {
574  # do something else
575  }
576  Description : If running interactively, the user is asked for input.
577  Return type : String - user's input
578  Exceptions : none
579  Caller : general
580 
581 =cut
582 
583 sub read_user_input {
584  my ($self, $text) = @_;
585 
586  if ($self->param('interactive')) {
587  print "$text\n" if $text;
588  my $input = <>;
589  chomp $input;
590  return $input;
591  }
592 }
593 
594 =head2 comma_to_list
595 
596  Arg[1-N] : list of parameter names to parse
597  Example : $support->comma_to_list('chromosomes');
598  Description : Transparently converts comma-separated lists into arrays (to
599  allow different styles of commandline options, see perldoc
600  Getopt::Long for details). Parameters are converted in place
601  (accessible through $self->param('name')).
602  Return type : true on success
603  Exceptions : none
604  Caller : general
605 
606 =cut
607 
608 sub comma_to_list {
609  my $self = shift;
610  foreach my $param (@_) {
611  $self->param($param,
612  split (/,/, join (',', $self->param($param))));
613  }
614  return(1);
615 }
616 
617 =head2 list_or_file
618 
619  Arg[1] : Name of parameter to parse
620  Example : $support->list_or_file('gene');
621  Description : Determines whether a parameter holds a list or it is a filename
622  to read the list entries from.
623  Return type : true on success
624  Exceptions : thrown if list file can't be opened
625  Caller : general
626 
627 =cut
628 
629 sub list_or_file {
630  my ($self, $param) = @_;
631  my @vals = $self->param($param);
632  return unless (@vals);
633 
634  my $firstval = $vals[0];
635  if (scalar(@vals) == 1 && -e $firstval) {
636  # we didn't get a list of values, but a file to read values from
637  @vals = ();
638  open(my $fh, '<', $firstval) or throw("Cannot open $firstval for reading: $!");
639  while(<$fh>){
640  chomp;
641  push(@vals, $_);
642  }
643  close($fh);
644  $self->param($param, @vals);
645  }
646  $self->comma_to_list($param);
647  return(1);
648 }
649 
650 =head2 param
651 
652  Arg[1] : Parameter name
653  Arg[2-N] : (optional) List of values to set
654  Example : my $dbname = $support->param('dbname');
655  $support->param('port', 3306);
656  $support->param('chromosomes', 1, 6, 'X');
657  Description : Getter/setter for parameters. Accepts single-value params and
658  list params.
659  Return type : Scalar value for single-value parameters, array of values for
660  list parameters
661  Exceptions : thrown if no parameter name is supplied
662  Caller : general
663 
664 =cut
665 
666 sub param {
667  my $self = shift;
668  my $name = shift or throw("You must supply a parameter name");
669 
670  # setter
671  if (@_) {
672  if (scalar(@_) == 1) {
673  # single value
674  $self->{'_param'}->{$name} = shift;
675  } else {
676  # list of values
677  undef $self->{'_param'}->{$name};
678  @{ $self->{'_param'}->{$name} } = @_;
679  }
680  }
681 
682  # getter
683  if (ref($self->{'_param'}->{$name}) eq 'ARRAY') {
684  # list parameter
685  return @{ $self->{'_param'}->{$name} };
686  } elsif (defined($self->{'_param'}->{$name})) {
687  # single-value parameter
688  return $self->{'_param'}->{$name};
689  } else {
690  return ();
691  }
692 }
693 
694 =head2 error
695 
696  Arg[1] : (optional) String - error message
697  Example : $support->error("An error occurred: $@");
698  exit(0) if $support->error;
699  Description : Getter/setter for error messages
700  Return type : String - error message
701  Exceptions : none
702  Caller : general
703 
704 =cut
705 
706 sub error {
707  my $self = shift;
708  $self->{'_error'} = shift if (@_);
709  return $self->{'_error'};
710 }
711 
712 =head2 warnings
713 
714  Example : print LOG "There were ".$support->warnings." warnings.\n";
715  Description : Returns the number of warnings encountered while running the
716  script (the warning counter is increased by $self->log_warning).
717  Return type : Int - number of warnings
718  Exceptions : none
719  Caller : general
720 
721 =cut
722 
723 sub warnings {
724  my $self = shift;
725  return $self->{'_warnings'};
726 }
727 
728 =head2 serverroot
729 
730  Arg[1] : (optional) String - root directory of your ensembl sandbox
731  Example : my $serverroot = $support->serverroot;
732  Description : Getter/setter for the root directory of your ensembl sandbox.
733  This is set when ConversionSupport object is created, so
734  usually only used as a getter.
735  Return type : String - the server root directory
736  Exceptions : none
737  Caller : general
738 
739 =cut
740 
741 sub serverroot {
742  my $self = shift;
743  $self->{'_serverroot'} = shift if (@_);
744  return $self->{'_serverroot'};
745 }
746 
747 =head2 get_database
748 
749  Arg[1] : String $database - the type of database to connect to
750  (eg core, otter)
751  Arg[2] : (optional) String $prefix - the prefix used for retrieving the
752  connection settings from the configuration
753  Example : my $db = $support->get_database('core');
754  Description : Connects to the database specified.
755  Return type : DBAdaptor of the appropriate type
756  Exceptions : thrown if asking for unknown database
757  Caller : general
758 
759 =cut
760 
761 sub get_database {
762  my $self = shift;
763  my $database = shift or throw("You must provide a database");
764  my $prefix = shift || '';
765  $self->check_required_params(
766  "${prefix}host",
767  "${prefix}port",
768  "${prefix}user",
769  "${prefix}dbname",
770  );
771  my %adaptors = (
772  core => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
773  ensembl => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
774  evega => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
775  otter => 'Bio::Otter::DBSQL::DBAdaptor',
776  vega => 'Bio::Otter::DBSQL::DBAdaptor',
777  compara => 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor',
778  loutre => 'Bio::Vega::DBSQL::DBAdaptor',
779  );
780  throw("Unknown database: $database") unless $adaptors{$database};
781 
782  $self->dynamic_use($adaptors{$database});
783  my $species = 'species' . $species_c;
784  my $dba = $adaptors{$database}->new(
785  -host => $self->param("${prefix}host"),
786  -port => $self->param("${prefix}port"),
787  -user => $self->param("${prefix}user"),
788  -pass => $self->param("${prefix}pass") || '',
789  -dbname => $self->param("${prefix}dbname"),
790  -group => $database,
791  -species => $species,
792  );
793  #can use this approach to get dna from another db
794 # my $dna_db = $adaptors{$database}->new(
795 # -host => 'otterlive',
796 # -port => '3301',
797 # -user => $self->param("${prefix}user"),
798 # -pass => $self->param("${prefix}pass"),
799 # -dbname => 'loutre_human',
800 # );
801 # $dba->dnadb($dna_db);
802 
803  # otherwise explicitely set the dnadb to itself - by default the Registry assumes
804  # a group 'core' for this now
805  $dba->dnadb($dba);
806 
807  $species_c++;
808 
809  $self->{'_dba'}->{$database} = $dba;
810  $self->{'_dba'}->{'default'} = $dba unless $self->{'_dba'}->{'default'};
811  return $self->{'_dba'}->{$database};
812 }
813 
814 
815 =head2 get_dbconnection
816 
817  Arg[1] : (optional) String $prefix - the prefix used for retrieving the
818  connection settings from the configuration
819  Example : my $dbh = $self->get_dbconnection;
820  Description : Connects to the database server specified. You don't have to
821  specify a database name (this is useful for running commands
822  like $dbh->do('show databases')).
823  Return type : DBI database handle
824  Exceptions : thrown if connection fails
825  Caller : general
826  Status : At Risk
827 
828 =cut
829 
830 sub get_dbconnection {
831  my $self = shift;
832  my $prefix = shift;
833 
834  $self->check_required_params(
835  "${prefix}host",
836  "${prefix}port",
837  "${prefix}user",
838  );
839 
840  my $dsn = "DBI:" . ($self->param('driver')||'mysql') .
841  ":host=" . $self->param("${prefix}host") .
842  ";port=" . $self->param("${prefix}port");
843 
844  if ($self->param("${prefix}dbname")) {
845  $dsn .= ";dbname=".$self->param("${prefix}dbname");
846  }
847 
848 # warn $dsn;
849 
850  my $dbh;
851  my $user = $self->param("${prefix}user");
852  my $pass = $self->param("${prefix}pass");
853  eval{
854  $dbh = DBI->connect($dsn, $user, $pass, {'RaiseError' => 1, 'PrintError' => 0});
855  };
856 
857  if (!$dbh || $@ || !$dbh->ping) {
858  $self->log_error("Could not connect to db server as user ".
859  $self->param("${prefix}user") .
860  " using [$dsn] as a locator:\n" . $DBI::errstr . $@);
861  }
862 
863  $self->{'_dbh'} = $dbh;
864  return $self->{'_dbh'};
865 
866 }
867 
868 
869 =head2 dba
870 
871  Arg[1] : (optional) String $database - type of db apaptor to retrieve
872  Example : my $dba = $support->dba;
873  Description : Getter for database adaptor. Returns default (i.e. created
874  first) db adaptor if no argument is provided.
875  Return type : Bio::EnsEMBL::DBSQL::DBAdaptor or Bio::Otter::DBSQL::DBAdaptor
876  Exceptions : none
877  Caller : general
878 
879 =cut
880 
881 sub dba {
882  my ($self, $database) = @_;
883  return $self->{'_dba'}->{$database} || $self->{'_dba'}->{'default'};
884 }
885 
886 =head2 dynamic_use
887 
888  Arg [1] : String $classname - The name of the class to require/import
889  Example : $self->dynamic_use('Bio::EnsEMBL::DBSQL::DBAdaptor');
890  Description: Requires and imports the methods for the classname provided,
891  checks the symbol table so that it doesnot re-require modules
892  that have already been required.
893  Returntype : true on success
894  Exceptions : Warns to standard error if module fails to compile
895  Caller : internal
896 
897 =cut
898 
899 sub dynamic_use {
900  my ($self, $classname) = @_;
901  my ($parent_namespace, $module) = $classname =~/^(.*::)(.*)$/ ? ($1,$2) : ('::', $classname);
902 
903  no strict 'refs'; ## no critic
904  # return if module has already been imported
905  return 1 if $parent_namespace->{$module.'::'} && %{ $parent_namespace->{$module.'::'}||{} };
906 
907  eval "require $classname"; ## no critic
908  throw("Failed to require $classname: $@") if ($@);
909  $classname->import();
910 
911  return 1;
912 }
913 
914 =head2 get_chrlength
915 
916  Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
917  Arg[2] : (optional) String $version - coord_system version
918  Arg[3] : (optional) String $type - type of region eg chromsome (defaults to 'toplevel')
919  Arg[4] : (optional) Boolean - return non reference slies as well (required for haplotypes eq 6-COX)
920  Arg[5] : (optional) Override chromosome parameter filtering with this array reference. Empty denotes all.
921  Example : my $chr_length = $support->get_chrlength($dba);
922  Description : Get all chromosomes and their length from the database. Return
923  chr_name/length for the chromosomes the user requested (or all
924  chromosomes by default)
925  Return type : Hashref - chromosome_name => length
926  Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
927  Caller : general
928 
929 =cut
930 
931 sub get_chrlength {
932  my ($self, $dba, $version,$type,$include_non_reference,$chroms) = @_;
933  $dba ||= $self->dba;
934  $type ||= 'toplevel';
935 
936  throw("get_chrlength should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n")
937  unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
938 
939  my $sa = $dba->get_SliceAdaptor;
940 
941  my @chromosomes = map { $_->seq_region_name }
942  @{ $sa->fetch_all($type,$version,$include_non_reference) };
943  my %chr = map { $_ => $sa->fetch_by_region($type, $_, undef, undef, undef, $version)->length } @chromosomes;
944 
945  my @wanted = $self->param('chromosomes');
946  @wanted = @$chroms if defined $chroms and ref($chroms) eq 'ARRAY';
947 
948  if (@wanted) {
949  # check if user supplied invalid chromosome names
950  foreach my $chr (@wanted) {
951  my $found = 0;
952  foreach my $chr_from_db (keys %chr) {
953  if ($chr_from_db eq $chr) {
954  $found = 1;
955  last;
956  }
957  }
958  unless ($found) {
959  #if you get this when using ensembl-vega then the dbname will be misleading.
960  warning("Didn't find chromosome $chr in database " .
961  $self->param('dbname'));
962  }
963  }
964 
965  # filter to requested chromosomes only
966  HASH:
967  foreach my $chr_from_db (keys %chr) {
968  foreach my $chr (@wanted) {
969  if ($chr_from_db eq $chr) {
970  next HASH;
971  }
972  }
973  delete($chr{$chr_from_db});
974  }
975  }
976 
977  return \%chr;
978 }
979 
980 =head2 get_ensembl_chr_mapping
981 
982  Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
983  Arg[2] : (optional) String $version - coord_system version
984  Example : my $ensembl_mapping = $support->get_ensembl_chr_mapping($dba);
985  Description : Gets a mapping between Vega chromosome names and their
986  equivalent Ensembl chromosomes. Works with non-reference chromosomes
987  Return type : Hashref - Vega name => Ensembl name
988  Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
989  Caller : general
990 
991 =cut
992 
993 sub get_ensembl_chr_mapping {
994  my ($self, $dba, $version) = @_;
995  $dba ||= $self->dba;
996  throw("get_ensembl_chr_mapping should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n") unless ($dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
997 
998  my $sa = $dba->get_SliceAdaptor;
999  my @chromosomes = map { $_->seq_region_name }
1000  @{ $sa->fetch_all('chromosome', $version, 1) };
1001 
1002  my %chrs;
1003  foreach my $chr (@chromosomes) {
1004  my $sr = $sa->fetch_by_region('chromosome', $chr, undef, undef, undef, $version);
1005  my ($ensembl_name_attr) = @{ $sr->get_all_Attributes('ensembl_name') };
1006  if ($ensembl_name_attr) {
1007  $chrs{$chr} = $ensembl_name_attr->value;
1008  } else {
1009  $chrs{$chr} = $chr;
1010  }
1011  }
1012  return \%chrs;
1013 }
1014 
1015 =head2 get_taxonomy_id
1016 
1017  Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
1018  Example : my $sid = $support->get_taxonony_id($dba);
1019  Description : Retrieves the taxononmy ID from the meta table
1020  Return type : Int - the taxonomy ID
1021  Exceptions : thrown if no taxonomy ID is found in the database
1022  Caller : general
1023 
1024 =cut
1025 
1026 sub get_taxonomy_id {
1027  my ($self, $dba) = @_;
1028  $dba ||= $self->dba;
1029  my $sql = 'SELECT meta_value FROM meta WHERE meta_key = "species.taxonomy_id"';
1030  my $sth = $dba->dbc->db_handle->prepare($sql);
1031  $sth->execute;
1032  my ($tid) = $sth->fetchrow_array;
1033  $sth->finish;
1034  $self->throw("Could not determine taxonomy_id from database.") unless $tid;
1035  return $tid;
1036 }
1037 
1038 =head2 get_species_scientific_name
1039 
1040  Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
1041  Example : my $species = $support->get_species_scientific_name($dba);
1042  Description : Retrieves the species scientific name (Genus species) from the
1043  meta table
1044  Return type : String - species scientific name
1045  Exceptions : thrown if species name can not be determined from db
1046  Caller : general
1047 
1048 =cut
1049 
1050 sub get_species_scientific_name {
1051  my ($self, $dba) = @_;
1052  $dba ||= $self->dba;
1053  my $sql = "SELECT meta_value FROM meta WHERE meta_key = \'species.scientific_name\'";
1054  my $sth = $dba->dbc->db_handle->prepare($sql);
1055  $sth->execute;
1056  my @sp;
1057  while (my @row = $sth->fetchrow_array) {
1058  push @sp, $row[0];
1059  }
1060  if (! @sp || @sp > 1) {
1061  $self->throw("Could not retrieve a single species scientific name from database.");
1062  }
1063  $sth->finish;
1064  my $species = $sp[0];
1065  $species =~ s/ /_/g;
1066  return $species;
1067 }
1068 
1069 =head2 species
1070 
1071  Arg[1] : (optional) String $species - species name to set
1072  Example : my $species = $support->species;
1073  my $url = "http://vega.sanger.ac.uk/$species/";
1074  Description : Getter/setter for species name (Genus_species). If not set, it's
1075  determined from database's meta table
1076  Return type : String - species name
1077  Exceptions : none
1078  Caller : general
1079 
1080 =cut
1081 
1082 sub species {
1083  my $self = shift;
1084  $self->{'_species'} = shift if (@_);
1085  # get species name from database if not set
1086  unless ($self->{'_species'}) {
1087  $self->{'_species'} = $self->get_species_scientific_name;
1088  }
1089  return $self->{'_species'};
1090 }
1091 
1092 =head2 sort_chromosomes
1093 
1094  Arg[1] : (optional) Hashref $chr_hashref - Hashref with chr_name as keys
1095  Example : my $chr = { '6-COX' => 1, '1' => 1, 'X' => 1 };
1096  my @sorted = $support->sort_chromosomes($chr);
1097  Description : Sorts chromosomes in an intuitive way (numerically, then
1098  alphabetically). If no chromosome hashref is passed, it's
1099  retrieve by calling $self->get_chrlength()
1100  Return type : List - sorted chromosome names
1101  Exceptions : thrown if no hashref is provided
1102  Caller : general
1103 
1104 =cut
1105 
1106 sub sort_chromosomes {
1107  my ($self, $chr_hashref, $version, $include_non_reference) = @_;
1108  $chr_hashref = $self->get_chrlength($self->dba, $version, 'chromosome', $include_non_reference) unless ($chr_hashref);
1109  throw("You have to pass a hashref of your chromosomes")
1110  unless ($chr_hashref and ref($chr_hashref) eq 'HASH');
1111  return (sort _by_chr_num keys %$chr_hashref);
1112 }
1113 
1114 =head2 _by_chr_num
1115 
1116  Example : my @sorted = sort _by_chr_num qw(X, 6-COX, 14, 7);
1117  Description : Subroutine to use in sort for sorting chromosomes. Sorts
1118  numerically, then alphabetically
1119  Return type : values to be used by sort
1120  Exceptions : none
1121  Caller : internal ($self->sort_chromosomes)
1122 
1123 =cut
1124 
1125 sub _by_chr_num {
1126  my @awords = split /-/, $a;
1127  my @bwords = split /-/, $b;
1128 
1129  my $anum = $awords[0];
1130  my $bnum = $bwords[0];
1131 
1132  if ($anum !~ /^[0-9]*$/) {
1133  if ($bnum !~ /^[0-9]*$/) {
1134  return $anum cmp $bnum;
1135  } else {
1136  return 1;
1137  }
1138  }
1139  if ($bnum !~ /^[0-9]*$/) {
1140  return -1;
1141  }
1142 
1143  if ($anum <=> $bnum) {
1144  return $anum <=> $bnum;
1145  } else {
1146  if ($#awords == 0) {
1147  return -1;
1148  } elsif ($#bwords == 0) {
1149  return 1;
1150  } else {
1151  return $awords[1] cmp $bwords[1];
1152  }
1153  }
1154 }
1155 
1156 =head2 split_chromosomes_by_size
1157 
1158  Arg[1] : (optional) Int $cutoff - the cutoff in bp between small and
1159  large chromosomes
1160  Arg[2] : (optional) Boolean to include duplicate regions, ie PAR or not
1161  (default is no)
1162  Arg[3] : (optional) Coordsystem version to retrieve
1163 
1164  Example : my $chr_slices = $support->split_chromosomes_by_size;
1165  foreach my $block_size (keys %{ $chr_slices }) {
1166  print "Chromosomes with blocksize $block_size: ";
1167  print join(", ", map { $_->seq_region_name }
1168  @{ $chr_slices->{$block_size} });
1169  }
1170  Description : Determines block sizes for storing DensityFeatures on
1171  chromosomes, and return slices for each chromosome. The block
1172  size is determined so that you have 150 bins for the smallest
1173  chromosome over 5 Mb in length. For chromosomes smaller than 5
1174  Mb, an additional smaller block size is used to yield 150 bins
1175  for the overall smallest chromosome. This will result in
1176  reasonable resolution for small chromosomes and high
1177  performance for big ones. Does not return non-reference seq_regions
1178  Return type : Hashref (key: block size; value: Arrayref of chromosome
1179  Bio::EnsEMBL::Slices)
1180  Exceptions : none
1181  Caller : density scripts
1182 
1183 =cut
1184 
1185 sub split_chromosomes_by_size {
1186  my $self = shift;
1187  my $cutoff = shift || 5000000;
1188  my $dup = shift || 0;
1189  my $cs_version = shift;
1190  my $include_non_reference = 1; #get non reference slices
1191  my $slice_adaptor = $self->dba->get_SliceAdaptor;
1192  my ($top_slices, $wanted_slices, $missed_slices);
1193  if ($self->param('chromosomes')) {
1194  foreach my $chr ($self->param('chromosomes')) {
1195  push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr);
1196  }
1197  }
1198  else {
1199  $top_slices = $slice_adaptor->fetch_all('chromosome',$cs_version,$include_non_reference,$dup);
1200  }
1201 
1202  # filter out patches, if present
1203  $wanted_slices = [ grep { $_->is_reference or $self->is_haplotype($_,$self->dba) } @$top_slices ];
1204 
1205  # make a note of which ones are excluded
1206  $missed_slices = [ grep { ! $_->is_reference and ! $self->is_haplotype($_,$self->dba) } @$top_slices ];
1207 
1208  #warn non PATCH toplevels slice that are excluded (could be haplotypes if an earlier stage has failed)
1209  foreach (@{$missed_slices || []}) {
1210  my $sr_name = $_->seq_region_name;
1211  if ($self->is_patch($_)) {
1212  $self->log("Excluding $sr_name\n");
1213  }
1214  else {
1215  $self->log_warning("Excluding $sr_name, are you sure ?\n");
1216  }
1217  }
1218 
1219  my ($big_chr, $small_chr, $min_big_chr, $min_small_chr);
1220  foreach my $slice (@{ $wanted_slices }) {
1221  next if ($slice->length eq 10000); #hack for chrY pseudoslice
1222  if ($slice->length < $cutoff) {
1223  if (! $min_small_chr or ($min_small_chr > $slice->length)) {
1224  $min_small_chr = $slice->length;
1225  }
1226  # push small chromosomes onto $small_chr
1227  push @{ $small_chr }, $slice;
1228  }
1229  elsif (! $min_big_chr or ($min_big_chr > $slice->length) ){
1230  $min_big_chr = $slice->length;
1231  }
1232  # push _all_ chromosomes onto $big_chr
1233  push @{ $big_chr }, $slice;
1234  }
1235  my $chr_slices;
1236  $chr_slices->{int($min_big_chr/150)} = $big_chr if $min_big_chr;
1237  $chr_slices->{int($min_small_chr/150)} = $small_chr if $min_small_chr;
1238  return $chr_slices;
1239 }
1240 
1241 =head2 is_patch
1242 
1243  Arg[1] : B::E::Slice
1244  Example : if ($support->is_patch($slice)) { ...
1245  Description : Looks at seq_region attributes to decide if a slice is a patch or not
1246  If PATCH seq_region_attrib is not there check to see if name suggests it is a PATCH
1247  Return type : true/false
1248 
1249 =cut
1250 
1251 sub is_patch {
1252  my ($self,$slice) = @_;
1253  my @patch_attrib_types = qw(patch_fix patch_novel); #seq_region_attribs used to define a patch
1254  foreach my $attrib_type (@patch_attrib_types) {
1255  if (@{$slice->get_all_Attributes($attrib_type)}) {
1256  return 1;
1257  }
1258  }
1259  return 0;
1260 }
1261 
1262 
1263 =head2 log
1264 
1265  Arg[1] : String $txt - the text to log
1266  Arg[2] : Int $indent - indentation level for log message
1267  Example : my $log = $support->log_filehandle;
1268  $support->log('Log foo.\n', 1);
1269  Description : Logs a message to the filehandle initialised by calling
1270  $self->log_filehandle(). You can supply an indentation level
1271  to get nice hierarchical log messages.
1272  Return type : true on success
1273  Exceptions : thrown when no filehandle can be obtained
1274  Caller : general
1275 
1276 =cut
1277 
1278 sub log {
1279  my ($self, $txt, $indent) = @_;
1280  $indent ||= 0;
1281 
1282  # strip off leading linebreaks so that indenting doesn't break
1283  $txt =~ s/^(\n*)//;
1284 
1285  $txt = $1." "x$indent . $txt;
1286  my $fh = $self->{'_log_filehandle'};
1287  throw("Unable to obtain log filehandle") unless $fh;
1288  print $fh "$txt";
1289  return(1);
1290 }
1291 
1292 =head2 lock_log
1293 
1294  Description : Use flock-style locks to lock log and fastforward to end.
1295  Useful if log is being written to by multiple processes.
1296 =cut
1297 
1298 sub lock_log {
1299  my ($self) = @_;
1300  ## no critic
1301  my $fh = $self->{'_log_filehandle'};
1302  return if -t $fh or -p $fh; # Shouldn't lock such things
1303  flock($self->{'_log_filehandle'},LOCK_EX) || return 0;
1304  seek($self->{'_log_filehandle'},0,SEEK_END); # fail ok, prob not reg file
1305  return 1;
1306 }
1307 
1308 =head2 unlock_log
1309 
1310  Description : Unlock log previously locked by lock_log.
1311 
1312 =cut
1313 
1314 sub unlock_log {
1315  my ($self) = @_;
1316  ## no critic
1317  my $fh = $self->{'_log_filehandle'};
1318  return if -t $fh or -p $fh; # We don't lock such things
1319  # flush is implicit in flock
1320  flock($self->{'_log_filehandle'},LOCK_UN) || return 0;
1321  return 1;
1322 }
1323 
1324 =head2 log_warning
1325 
1326  Arg[1] : String $txt - the warning text to log
1327  Arg[2] : Int $indent - indentation level for log message
1328  Arg[3] : Bool - add a line break before warning if true
1329  Example : my $log = $support->log_filehandle;
1330  $support->log_warning('Log foo.\n', 1);
1331  Description : Logs a message via $self->log and increases the warning counter.
1332  Return type : true on success
1333  Exceptions : none
1334  Caller : general
1335 
1336 =cut
1337 
1338 sub log_warning {
1339  my ($self, $txt, $indent, $break) = @_;
1340  $txt = "WARNING: " . $txt;
1341  $txt = "\n$txt" if ($break);
1342  $self->log($txt, $indent);
1343  $self->{'_warnings'}++;
1344  return(1);
1345 }
1346 
1347 =head2 log_error
1348 
1349  Arg[1] : String $txt - the error text to log
1350  Arg[2] : Int $indent - indentation level for log message
1351  Example : my $log = $support->log_filehandle;
1352  $support->log_error('Log foo.\n', 1);
1353  Description : Logs a message via $self->log and exits the script.
1354  Return type : none
1355  Exceptions : none
1356  Caller : general
1357 
1358 =cut
1359 
1360 sub log_error {
1361  my ($self, $txt, $indent) = @_;
1362  $txt = "ERROR: ".$txt;
1363  $self->log($txt, $indent);
1364  $self->log("Exiting.\n");
1365  exit;
1366 }
1367 
1368 =head2 log_verbose
1369 
1370  Arg[1] : String $txt - the warning text to log
1371  Arg[2] : Int $indent - indentation level for log message
1372  Example : my $log = $support->log_filehandle;
1373  $support->log_verbose('Log this verbose message.\n', 1);
1374  Description : Logs a message via $self->log if --verbose option was used
1375  Return type : TRUE on success, FALSE if not verbose
1376  Exceptions : none
1377  Caller : general
1378 
1379 =cut
1380 
1381 sub log_verbose {
1382  my ($self, $txt, $indent) = @_;
1383  return(0) unless $self->param('verbose');
1384  $self->log($txt, $indent);
1385  return(1);
1386 }
1387 
1388 =head2 log_stamped
1389 
1390  Arg[1] : String $txt - the warning text to log
1391  Arg[2] : Int $indent - indentation level for log message
1392  Example : my $log = $support->log_filehandle;
1393  $support->log_stamped('Log this stamped message.\n', 1);
1394  Description : Appends timestamp and memory usage to a message and logs it via
1395  $self->log
1396  Return type : TRUE on success
1397  Exceptions : none
1398  Caller : general
1399 
1400 =cut
1401 
1402 sub log_stamped {
1403  my ($self, $txt, $indent) = @_;
1404  # append timestamp and memory usage to log text
1405  $txt =~ s/(\n*)$//;
1406  $txt .= " ".$self->date_and_mem.$1;
1407  $self->log($txt, $indent);
1408  return(1);
1409 }
1410 
1411 =head2 log_filehandle
1412 
1413  Arg[1] : (optional) String $mode - file access mode
1414  Example : my $log = $support->log_filehandle;
1415  # print to the filehandle
1416  print $log 'Lets start logging...\n';
1417  # log via the wrapper $self->log()
1418  $support->log('Another log message.\n');
1419  Description : Returns a filehandle for logging (STDERR by default, logfile if
1420  set from config or commandline). You can use the filehandle
1421  directly to print to, or use the smart wrapper $self->log().
1422  Logging mode (truncate or append) can be set by passing the
1423  mode as an argument to log_filehandle(), or with the
1424  --logappend commandline option (default: truncate)
1425  Return type : Filehandle - the filehandle to log to
1426  Exceptions : thrown if logfile can't be opened
1427  Caller : general
1428 
1429 =cut
1430 
1431 sub log_filehandle {
1432  my ($self, $mode, $date) = @_;
1433  $mode ||= '>';
1434  $mode = '>>' if ($self->param('logappend'));
1435  my $fh = \*STDERR;
1436  if (my $logfile = $self->param('logfile')) {
1437  $logfile .= "_$date" if $date;
1438  if (my $logpath = $self->param('logpath')) {
1439  unless (-e $logpath) {
1440  system("mkdir $logpath") == 0 or
1441  $self->log_error("Can't create log dir $logpath: $!\n");
1442  }
1443  $logfile = "$logpath/$logfile";
1444  }
1445  open($fh, "$mode", $logfile) or throw(
1446  "Unable to open $logfile for writing: $!");
1447  }
1448  $self->{'_log_filehandle'} = $fh;
1449  return $self->{'_log_filehandle'};
1450 }
1451 
1452 =head2 filehandle
1453 
1454  Arg[1] : String $mode - file access mode
1455  Arg[2] : String $file - input or output file
1456  Example : my $fh = $support->filehandle('>>', '/path/to/file');
1457  # print to the filehandle
1458  print $fh 'Your text goes here...\n';
1459  Description : Returns a filehandle (*STDOUT for writing, *STDIN for reading
1460  by default) to print to or read from.
1461  Return type : Filehandle - the filehandle
1462  Exceptions : thrown if file can't be opened
1463  Caller : general
1464 
1465 =cut
1466 
1467 sub filehandle {
1468  my ($self, $mode, $file) = @_;
1469  $mode ||= ">";
1470  my $fh;
1471  if ($file) {
1472  open($fh, "$mode", $file) or throw(
1473  "Unable to open $file for writing: $!");
1474  } elsif ($mode =~ />/) {
1475  $fh = \*STDOUT;
1476  } elsif ($mode =~ /</) {
1477  $fh = \*STDIN;
1478  }
1479  return $fh;
1480 }
1481 
1482 =head2 init_log_date
1483 
1484  Example : $support->init_log_date;
1485  Description : Opens a filehandle to a logfile with the date in the file name
1486  Return type : Filehandle - the log filehandle
1487  Exceptions : none
1488  Caller : general
1489 
1490 =cut
1491 
1492 sub init_log_date {
1493  my $self = shift;
1494  my $date = $self->date;
1495  return $self->init_log($date);
1496 }
1497 
1498 =head2 init_log
1499 
1500  Example : $support->init_log;
1501  Description : Opens a filehandle to the logfile and prints some header
1502  information to this file. This includes script name, date, user
1503  running the script and parameters the script will be running
1504  with.
1505  Return type : Filehandle - the log filehandle
1506  Exceptions : none
1507  Caller : general
1508 
1509 =cut
1510 
1511 sub init_log {
1512  my $self = shift;
1513  my $date = shift;
1514 
1515  # get a log filehandle
1516  my $log = $self->log_filehandle(undef,$date);
1517 
1518  # print script name, date, user who is running it
1519  unless($self->param('hideparamlist')) {
1520  my $hostname = `hostname`;
1521  chomp $hostname;
1522  my $script = "$hostname:$Bin/$Script";
1523  my $user = `whoami`;
1524  chomp $user;
1525  $self->log("Script: $script\nDate: ".$self->date_and_time."\nUser: $user\n");
1526 
1527  # print parameters the script is running with
1528  $self->log("Parameters:\n\n");
1529  $self->log($self->list_all_params);
1530  }
1531  # remember start time
1532  $self->{'_start_time'} = time;
1533 
1534  return $log;
1535 }
1536 
1537 =head2 finish_log
1538 
1539  Example : $support->finish_log;
1540  Description : Writes footer information to a logfile. This includes the
1541  number of logged warnings, timestamp and memory footprint.
1542  Return type : TRUE on success
1543  Exceptions : none
1544  Caller : general
1545 
1546 =cut
1547 
1548 sub finish_log {
1549  my $self = shift;
1550 
1551  unless($self->param('hideparamlist')) {
1552  $self->log("\nAll done. ".$self->warnings." warnings. ");
1553  if ($self->{'_start_time'}) {
1554  $self->log("Runtime ");
1555  my $diff = time - $self->{'_start_time'};
1556  my $sec = $diff % 60;
1557  $diff = ($diff - $sec) / 60;
1558  my $min = $diff % 60;
1559  my $hours = ($diff - $min) / 60;
1560  $self->log("${hours}h ${min}min ${sec}sec ");
1561  }
1562  $self->log($self->date_and_mem."\n\n");
1563  }
1564  if($self->param('joblog')) {
1565  my $fh;
1566  unless(open($fh,'>',$self->param('joblog'))) {
1567  $self->log_warning("Could not log job to '".$self->param('joblog').
1568  "': $!");
1569  return 1;
1570  }
1571  my @keys;
1572  push @keys,"RUNTIME",(time - $self->{'_start_time'});
1573  my $mem = `ps -p $$ -o vsz |tail -1`;
1574  chomp $mem;
1575  push @keys,"MEMORY",$mem;
1576  push @keys,"WARNINGS",$self->warnings;
1577  push @keys,"START",$self->{'_start_time'};
1578  push @keys,"END",time;
1579  while(@keys) {
1580  my ($k,$v) = splice(@keys,0,2);
1581  print $fh "$k: $v\n";
1582  }
1583  close $fh;
1584  }
1585  return(1);
1586 }
1587 
1588 =head2 date_and_mem
1589 
1590  Example : print LOG "Time, memory usage: ".$support->date_and_mem."\n";
1591  Description : Prints a timestamp and the memory usage of your script.
1592  Return type : String - timestamp and memory usage
1593  Exceptions : none
1594  Caller : general
1595 
1596 =cut
1597 
1598 sub date_and_mem {
1599  my $date = strftime "%Y-%m-%d %T", localtime;
1600  my $mem = `ps -p $$ -o vsz |tail -1`;
1601  chomp $mem;
1602  return "[$date, mem $mem]";
1603 }
1604 
1605 =head2 date
1606 
1607  Example : print "Date: " . $support->date . "\n";
1608  Description : Prints a nicely formatted datetamp (YYYY-DD-MM)
1609  Return type : String - the timestamp
1610  Exceptions : none
1611  Caller : general
1612 
1613 =cut
1614 
1615 sub date {
1616  return strftime "%Y-%m-%d", localtime;
1617 }
1618 
1619 =head2 date_and_time
1620 
1621  Example : print "Date: " . $support->date . "\n";
1622  Description : Prints a nicely formatted timestamp (YYYY-DD-MM hh:mm:ss)
1623  Return type : String - the timestamp
1624  Exceptions : none
1625  Caller : general
1626 
1627 =cut
1628 
1629 sub date_and_time {
1630  return strftime "%Y-%m-%d %T", localtime;
1631 }
1632 
1633 =head2 format_time
1634 
1635  Example : print $support->format_time($gene->modifed_date) . "\n";
1636  Description : Prints timestamps from the database
1637  Return type : String - nicely formatted time stamp
1638  Exceptions : none
1639  Caller : general
1640 
1641 =cut
1642 
1643 
1644 sub date_format {
1645  my( $self, $time, $format ) = @_;
1646  my( $d,$m,$y) = (localtime($time))[3,4,5];
1647  my %S = ('d'=>sprintf('%02d',$d),'m'=>sprintf('%02d',$m+1),'y'=>$y+1900);
1648  (my $res = $format ) =~s/%(\w)/$S{$1}/ge;
1649  return $res;
1650 }
1651 
1652 
1653 =head2 mem
1654 
1655  Example : print "Memory usage: " . $support->mem . "\n";
1656  Description : Prints the memory used by your script. Not sure about platform
1657  dependence of this call ...
1658  Return type : String - memory usage
1659  Exceptions : none
1660  Caller : general
1661 
1662 =cut
1663 
1664 sub mem {
1665  my $mem = `ps -p $$ -o vsz |tail -1`;
1666  chomp $mem;
1667  return $mem;
1668 }
1669 
1670 =head2 commify
1671 
1672  Arg[1] : Int $num - a number to commify
1673  Example : print "An easy to read number: ".$self->commify(100000000);
1674  # will print 100,000,000
1675  Description : put commas into a number to make it easier to read
1676  Return type : a string representing the commified number
1677  Exceptions : none
1678  Caller : general
1679  Status : stable
1680 
1681 =cut
1682 
1683 sub commify {
1684  my $self = shift;
1685  my $num = shift;
1686 
1687  $num = reverse($num);
1688  $num =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
1689 
1690  return scalar reverse $num;
1691 }
1692 
1693 =head2 fetch_non_hidden_slices
1694 
1695  Arg[1] : B::E::SliceAdaptor
1696  Arg[2] : B::E::AttributeAdaptor
1697  Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
1698  Arg[4] : string $coord_system_version (optional) - 'otter' by default
1699  Example : $chroms = $support->fetch_non_hidden_slice($sa,$aa);
1700  Description : retrieve all slices from a loutre database that don't have a hidden attribute.
1701  Doesn't retrieve non-reference slices
1702  Return type : arrayref
1703  Caller : general
1704  Status : stable
1705 
1706 =cut
1707 
1708 sub fetch_non_hidden_slices {
1709  my $self = shift;
1710  my $aa = shift or throw("You must supply an attribute adaptor");
1711  my $sa = shift or throw("You must supply a slice adaptor");
1712  my $cs = shift || 'chromosome';
1713  my $cv = shift || 'Otter';
1714  my $visible_chroms;
1715  foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
1716  my $chrom_name = $chrom->name;
1717  my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
1718  if ( scalar(@$attribs) > 1 ) {
1719  $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n");
1720  }
1721  elsif ($attribs->[0]->value == 0) {
1722  push @$visible_chroms, $chrom;
1723  }
1724  elsif ($attribs->[0]->value == 1) {
1725  $self->log_verbose("chromosome $chrom_name is hidden\n");
1726  }
1727  else {
1728  $self->log_warning("No hidden attribute for chromosome $chrom_name\n");
1729  }
1730  }
1731  return $visible_chroms;
1732 }
1733 
1734 =head2 get_non_hidden_slice_names
1735 
1736  Arg[1] : B::E::SliceAdaptor
1737  Arg[2] : B::E::AttributeAdaptor
1738  Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
1739  Arg[4] : string $coord_system_version (optional) - 'otter' by default
1740  Example : $chrom_names = $support->get_non_hidden_slice_names($sa,$aa);
1741  Description : retrieve names of all slices from a loutre database that don't have a hidden attribute.
1742  Doesn't retrieve non-reference slices
1743  Return type : arrayref of names of all non-hidden slices
1744  Caller : general
1745  Status : stable
1746 
1747 =cut
1748 
1749 sub get_non_hidden_slice_names {
1750  my $self = shift;
1751  my $aa = shift or throw("You must supply an attribute adaptor");
1752  my $sa = shift or throw("You must supply a slice adaptor");
1753  my $cs = shift || 'chromosome';
1754  my $cv = shift || 'Otter';
1755  my $visible_chrom_names;
1756  foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
1757  my $chrom_name = $chrom->seq_region_name;
1758  my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
1759  if ( scalar(@$attribs) > 1 ) {
1760  $self->log_warning("More than one hidden attribute for chromosome $chrom_name\n");
1761  }
1762  elsif ($attribs->[0]->value == 0) {
1763  push @$visible_chrom_names, $chrom_name;
1764  }
1765  elsif ($attribs->[0]->value == 1) {
1766  $self->log_verbose("chromosome $chrom_name is hidden\n");
1767  }
1768  else {
1769  $self->log_warning("No hidden attribute for chromosome $chrom_name\n");
1770  }
1771  }
1772  return $visible_chrom_names;
1773 }
1774 
1775 
1776 =head2 get_wanted_chromosomes
1777 
1778  Arg[1] : B::E::SliceAdaptor
1779  Arg[2] : B::E::AttributeAdaptor
1780  Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
1781  Arg[4] : string $coord_system_version (optional) - 'otter' by default
1782  Example : $chr_names = $support->get_wanted_chromosomes($laa,$lsa);
1783  Description : retrieve names of slices from a lutra database that are ready for dumping to Vega.
1784  Deals with list of names to ignore (ignore_chr = LIST)
1785  Return type : arrayref of slices
1786  Caller : general
1787  Status : stable
1788 
1789 =cut
1790 
1791 sub get_wanted_chromosomes {
1792  my $self = shift;
1793  my $aa = shift or throw("You must supply an attribute adaptor");
1794  my $sa = shift or throw("You must supply a slice adaptor");
1795  my $cs = shift || 'chromosome';
1796  my $cv = shift || 'Otter';
1797  my $export_mode = $self->param('release_type');
1798  my $release = $self->param('vega_release');
1799  my $names;
1800  my $chroms = $self->fetch_non_hidden_slices($aa,$sa,$cs,$cv);
1801  CHROM:
1802  foreach my $chrom (@$chroms) {
1803  my $attribs = $aa->fetch_all_by_Slice($chrom);
1804  my $vals = $self->get_attrib_values($attribs,'vega_export_mod');
1805  if (scalar(@$vals > 1)) {
1806  $self->log_warning ("Multiple attribs for \'vega_export_mod\', please fix before continuing");
1807  exit;
1808  }
1809  next CHROM if (! grep { $_ eq $export_mode} @$vals);
1810  $vals = $self->get_attrib_values($attribs,'vega_release',$release);
1811  if (scalar(@$vals > 1)) {
1812  $self->log_warning ("Multiple attribs for \'vega_release\' value = $release , please fix before continuing");
1813  exit;
1814  }
1815  next CHROM if (! grep { $_ eq $release} @$vals);
1816  my $name = $chrom->seq_region_name;
1817  if (my @ignored = $self->param('ignore_chr')) {
1818  next CHROM if (grep {$_ eq $name} @ignored);
1819  }
1820  push @{$names}, $name;
1821  }
1822  return $names;
1823 }
1824 
1825 =head2 is_haplotype
1826 
1827  Arg[1] : B::E::Slice
1828  Arg[2]: : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!)
1829  Description : Is the slice a Vega haplotype? At the moment this is
1830  implemented by testing for presence of vega_ref_chrom but non_ref
1831  which is correct in practice, but really misses the prupose of
1832  vega_ref_chrom, so this might bite us if that changes.
1833  Return type : boolean
1834 
1835 =cut
1836 
1837 sub is_haplotype {
1838  my ($self,$slice,$dba) = @_;
1839 
1840  $dba ||= $self->dba;
1841  my $aa = $dba->get_adaptor('Attribute');
1842 
1843  my $attribs = $aa->fetch_all_by_Slice($slice);
1844  return (@{$self->get_attrib_values($attribs,'vega_ref_chrom')} and
1845  @{$self->get_attrib_values($attribs,'non_ref',1)});
1846 }
1847 
1848 =head2 get_unique_genes
1849 
1850  Arg[1] : B::E::Slice
1851  Arg[2] : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!)
1852  Example : $genes = $support->get_unique_genes($slice,$dba);
1853  Description : Retrieve genes that are only on the slice itself - used for human where assembly patches
1854  are in the assembly_exception table. Needs the PATCHes to have 'non_ref' seq_region_attributes.
1855  Return type : arrayref of genes
1856  Caller : general
1857  Status : stable
1858 
1859 =cut
1860 
1861 sub get_unique_genes {
1862  my $self = shift;
1863  my ($slice,$dba) = @_;
1864  $slice or throw("You must supply a slice");
1865  $dba ||= $self->dba;
1866 
1867  my $sa = $dba->get_adaptor('Slice');
1868  my $ga = $dba->get_adaptor('Gene');
1869  my $patch = 0;
1870  my $genes = [];
1871  if ( ! $slice->is_reference() and ! $self->is_haplotype($slice,$dba) ) {
1872 # if ( 0 ) {
1873  $patch = 1;
1874  my $slices = $sa->fetch_by_region_unique( $slice->coord_system_name(),$slice->seq_region_name(),undef,undef,undef,$slice->coord_system()->version() );
1875  foreach my $slice ( @{$slices} ) {
1876  push @$genes,@{$ga->fetch_all_by_Slice($slice)};
1877  # my $start = $slice->start;
1878  }
1879  }
1880  else {
1881  $genes = $ga->fetch_all_by_Slice($slice);
1882  }
1883  return ($genes, $patch);
1884 }
1885 
1886 
1887 
1888 =head2 get_attrib_values
1889 
1890  Arg[1] : Arrayref of B::E::Attributes
1891  Arg[2] : 'code' to search for
1892  Arg[3] : 'value' to search for (optional)
1893  Example : my $c = $self->get_attrib_values($attribs,'name'));
1894  Description : (i) In the absence of an attribute value argument, examines an arrayref
1895  of B::E::Attributes for a particular attribute type, returning the values
1896  for each attribute of that type. Can therefore be used to test for the
1897  number of attributes of that type.
1898  (ii) In the presence of the optional value argument it returns all
1899  attributes with that value ie can be used to test for the presence of an
1900  attribute with that particular value.
1901  Return type : arrayref of values for that attribute
1902  Caller : general
1903  Status : stable
1904 
1905 =cut
1906 
1907 sub get_attrib_values {
1908  my $self = shift;
1909  my $attribs = shift;
1910  my $code = shift;
1911  my $value = shift;
1912  if (my @atts = grep {$_->code eq $code } @$attribs) {
1913  my $r = [];
1914  if ($value) {
1915  if (my @values = grep {$_->value eq $value} @atts) {
1916  foreach (@values) {
1917  push @$r, $_->value;
1918  }
1919  return $r;
1920  }
1921  else {
1922  return [];
1923  }
1924  }
1925  else {
1926  foreach (@atts) {
1927  push @$r, $_->value;
1928  }
1929  return $r;
1930  }
1931  }
1932  else {
1933  return [];
1934  }
1935 }
1936 
1937 =head2 fix_attrib_value
1938 
1939  Arg[1] : Arrayref of existing B::E::Attributes
1940  Arg[2] : dbID of object
1941  Arg[3] : name of object (just for reporting)
1942  Arg[4] : attrib_type.code
1943  Arg[5] : attrib_type.value
1944  Arg[6] : interactive ? (0 by default)
1945  Arg[7] : table
1946  Example : $support->fix_attrib_value($attribs,$chr_id,$chr_name,'vega_export_mod','N',1);
1947  Description : adds a new attribute to an object, or updates an existing attribute with a new value
1948  Can be run in interactive or non-interactive mode (default)
1949  Return type : arrayref of results
1950  Caller : general
1951  Status : only ever tested with seq_region_attributes to date
1952 
1953 =cut
1954 
1955 sub fix_attrib_value {
1956  my $self = shift;
1957  my $attribs = shift;
1958  my $id = shift;
1959  my $name = shift;
1960  my $code = shift;
1961  my $value = shift;
1962  my $interact = shift || 0;
1963  my $table = shift || 'seq_region_attrib';
1964 
1965  #transiently set interactive parameter to zero
1966  my $int_before;
1967  if (! $interact) {
1968  $int_before = $self->param('interactive');
1969  $self->param('interactive',0);
1970  }
1971 
1972  #get any existing value(s) for this attribute
1973  my $existings = $self->get_attrib_values($attribs,$code);
1974 
1975  #add a new attribute if there is none...
1976  if (! @$existings ) {
1977  if ($self->user_proceed("Do you want to set $name attrib (code = $code) to value $value ?")) {
1978  my $r = $self->store_new_attribute($id,$code,$value);
1979 
1980  #reset interactive parameter
1981  $self->param('interactive',$int_before) if (! $interact);
1982  return $r;
1983  }
1984  }
1985  #...warn and exit if you're trying to update more than one value for the same attribute...
1986  elsif (scalar @$existings > 1) {
1987  $self->log_warning("You shouldn't be trying to update multiple attributes with the same code at once ($name:$code,$value), looks like you have duplicate entries in the (seq_region_)attrib table\n");
1988  exit;
1989  }
1990 
1991  #...or update an attribute with new values...
1992  else {
1993  my $existing = $existings->[0];
1994  if ($existing ne $value) {
1995  if ($self->user_proceed("Do you want to reset $name attrib (code = $code) from $existing to $value ?")) {
1996  my $r = $self->update_attribute($id,$code,$value);
1997  $self->param('interactive',$int_before) if (! $interact);
1998  push @$r, $existing;
1999  return $r;
2000  }
2001  }
2002  #...or make no change
2003  else {
2004  $self->param('interactive',$int_before) if (! $interact);
2005  return [];
2006  }
2007  }
2008 }
2009 
2010 =head2 _get_attrib_id
2011 
2012  Arg[1] : attrib_type.code
2013  Arg[2] : database handle
2014  Example : $self->_get_attrib_id('name',$dbh)
2015  Description : get attrib_type.attrib_type_id from a attrib_type.code
2016  Return type : attrib_type.attrib_type_id
2017  Caller : internal
2018  Status : stable
2019 
2020 =cut
2021 
2022 sub _get_attrib_id {
2023  my $self = shift;
2024  my $attrib_code = shift;
2025  my $dbh = shift;
2026  my ($attrib_id) = $dbh->selectrow_array(
2027  qq(select attrib_type_id
2028  from attrib_type
2029  where code = ?),
2030  {},
2031  ($attrib_code)
2032  );
2033  if (! $attrib_id) {
2034  $self->log_warning("There is no attrib_type_id for code $attrib_code, please patch the attrib_table\n");
2035  exit;
2036  }
2037  else {
2038  return $attrib_id;
2039  }
2040 }
2041 
2042 =head2 store_new_attribute
2043 
2044  Arg[1] : seq_region.seq_region_id
2045  Arg[2] : attrib_type.code
2046  Arg[3] : attrib_type.value
2047  ARG[4] : table to update (seq_region_attribute by default)
2048  Example : $support->store_new_attribute(23,name,5);
2049  Description : uses MySQL to store an entry (code and value) in an attribute table
2050  (seq_region_attrib by default)
2051  Return type : array_ref
2052  Caller : general
2053  Status : stable
2054 
2055 =cut
2056 
2057 sub store_new_attribute {
2058  my $self = shift;
2059  my $sr_id = shift;
2060  my $attrib_code = shift;
2061  my $attrib_value = shift || '';
2062  my $table = shift || 'seq_region_attrib';
2063 
2064  #get database handle
2065  my $dbh = $self->get_dbconnection('loutre');
2066  #get attrib_type_id for this particular attribute
2067  my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
2068  #store
2069  my $r = $dbh->do(
2070  qq(insert into $table
2071  values (?,?,?)),
2072  {},
2073  ($sr_id,$attrib_id,$attrib_value)
2074  );
2075  return ['Stored',$r];
2076 }
2077 
2078 =head2 update_attribute
2079 
2080  Arg[1] : seq_region.seq_region_id
2081  Arg[2] : attrib_type.code
2082  Arg[3] : attrib_type.value
2083  ARG[4] : table to update (seq_region_attribute by default)
2084  Example : $support->update_attribute(23,name,5);
2085  Description : uses MySQL to update an attribute table (seq_region_attrib by default)
2086  Return type : array_ref
2087  Caller : general
2088  Status : stable
2089 
2090 =cut
2091 
2092 sub update_attribute {
2093  my $self = shift;
2094  my $sr_id = shift;
2095  my $attrib_code = shift;
2096  my $attrib_value = shift;
2097  my $table = shift || 'seq_region_attrib';
2098  my $dbh = $self->get_dbconnection('loutre');
2099  my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
2100  #update
2101  my $r = $dbh->do(
2102  qq(update $table
2103  set value = ?
2104  where seq_region_id = $sr_id
2105  and attrib_type_id = $attrib_id),
2106  {},
2107  ($attrib_value)
2108  );
2109  return ['Updated',$r];
2110 }
2111 
2112 
2113 =head2 remove_duplicate_attribs
2114 
2115  Arg[1] : db handle
2116  Arg[2] : table
2117  Example : $support->remove_duplicate_attribs($dbh,'gene');
2118  Description : uses MySQL to remove duplicate entries from an attribute table
2119  Return type : none
2120  Caller : general
2121  Status : stable
2122 
2123 =cut
2124 
2125 sub remove_duplicate_attribs {
2126  my $self = shift;
2127  my $dbh = shift;
2128  my $table = shift;
2129  $dbh->do(qq(create table nondup_${table}_attrib like ${table}_attrib));
2130  $dbh->do(qq(insert into nondup_${table}_attrib (select ${table}_id, attrib_type_id, value from ${table}_attrib group by ${table}_id, attrib_type_id, value)));
2131  $dbh->do(qq(delete from ${table}_attrib));
2132  $dbh->do(qq(insert into ${table}_attrib (select ${table}_id, attrib_type_id, value from nondup_${table}_attrib)));
2133  $dbh->do(qq(drop table nondup_${table}_attrib));
2134 }
2135 
2136 =head2 sav_seq
2137 
2138  Arg[1] : string (sequence to save)
2139  Example : $support->save_seq('ACGT')
2140  Description : creates a temporary file containing the sequence you give it
2141  Return type : string (filename)
2142  Caller : general
2143  Status : stable
2144 
2145 =cut
2146 
2147 sub save_seq {
2148  my $self = shift;
2149  my $content = shift ;
2150  my $seq_file = $self->param('logpath') . '/SEQ_' . time() . int(rand()*100000000) . $$;
2151  open (my $fh,">$seq_file", $seq_file) or die("Cannot create working file.$!");
2152  print $fh $content;
2153  close $fh;
2154  return ($seq_file);
2155 }
2156 
2157 =head2 get_alignment
2158 
2159  Arg[1] : string (first sequence)
2160  Arg[1] : string (second sequence)
2161  Arg[1] : string (sequence type))
2162  Example : $support->get_alignment('AAAAA','CCCCCCC','DNA')
2163  Description : creates a temporary file containing the sequence you give it
2164  Return type : string (filename)
2165  Caller : general
2166  Status : stable
2167 
2168 =cut
2169 
2170 sub get_alignment {
2171  my $self = shift;
2172  my $ext_seq = shift || return undef;
2173  my $int_seq = shift || return undef;
2174  $int_seq =~ s/<br \/>//g;
2175  my $seq_type = shift || return undef;
2176 
2177 
2178  # To stop box running out of memory - put an upper limit on the size of sequence
2179  # that alignview can handle
2180  if (length $int_seq > 1e6 || length $ext_seq > 1e6) {
2181  $self->log_error('Cannot align if sequence > 1 Mbase');
2182  }
2183 
2184  my $int_seq_file = $self->save_seq($int_seq);
2185  my $ext_seq_file = $self->save_seq($ext_seq);
2186 
2187  my $label_width = '22'; # width of column for e! object label
2188  my $output_width = 61; # width of alignment
2189  my $dnaAlignExe = '/localsw/bin/emboss/bin/matcher -asequence %s -bsequence %s -outfile %s';
2190  my $pepAlignExe = '/localsw/bin/wise2/bin/psw -dymem explicit -m /localsw/bin/wise2/wisecfg/blosum62.bla %s %s -n %s -w %s > %s';
2191 
2192  my $out_file = time() . int(rand()*100000000) . $$;
2193  $out_file = $self->param('logpath').'/' . $out_file . '.out';
2194 
2195  my $command;
2196  my $fh;
2197  if ($seq_type eq 'DNA') {
2198  $command = sprintf $dnaAlignExe, $int_seq_file, $ext_seq_file, $out_file;
2199  `$command`;
2200  unless (open($fh, "<", $out_file)) {
2201  $command = sprintf $dnaAlignExe, $int_seq_file, $ext_seq_file, $out_file;
2202  `$command`;
2203  }
2204  }
2205  elsif ($seq_type eq 'PEP') {
2206  $command = sprintf $pepAlignExe, $int_seq_file, $ext_seq_file, $label_width, $output_width, $out_file;
2207  `$command`;
2208  unless (open($fh, "<", $out_file)) {
2209  $self->log_warning("Cannot open alignment file\n");
2210  }
2211  }
2212  my $alignment ;
2213  while (<$fh>) {
2214  next if $_ =~
2215  /\#Report_file
2216  |\#----.*
2217  |\/\/\s*
2218  |\#\#\#
2219  |^\#$
2220  |Rundate: #matcher
2221  |Commandline #matcher
2222  |asequence #matcher
2223  |bsequence #matcher
2224  |outfile #matcher
2225  |aformat #matcher
2226  |Align_format #matcher
2227  |Report_file #matcher
2228  /x;
2229  $alignment .= $_;
2230  }
2231 
2232  $alignment =~ s/\n+$//;
2233  unlink $out_file;
2234  unlink $int_seq_file;
2235  unlink $ext_seq_file;
2236  return $alignment;
2237 }
2238 
2239 sub allowed_duplicate_regions {
2240  my $self = shift;
2241 
2242  # set up lists of vega names of wanted haplotype / strains
2243  my $regions = {
2244  'human' => [
2245  {
2246  '6' => '28000000:34000000',
2247  '6-COX' => 'all',
2248  '6-QBL' => 'all',
2249  '6-APD' => 'all',
2250  '6-MANN' => 'all',
2251  '6-MCF' => 'all',
2252  '6-DBB' => 'all',
2253  '6-SSTO' => 'all',
2254  },
2255  {
2256  '19' => '54020000:54910000',
2257  '19-PGF_1' => 'all',
2258  '19-PGF_2' => 'all',
2259  '19-COX_1' => 'all',
2260  '19-COX_2' => 'all',
2261  '19-DM1A' => 'all',
2262  '19-DM1B' => 'all',
2263  '19-MC1A' => 'all',
2264  '19-MC1B' => 'all',
2265  },
2266  ],
2267  'mouse' => [
2268  {
2269  '1' => '60564732:63711641',
2270  '1-Idd5.1_DIL' => 'all',
2271  '1-Idd5.1_CHO' => 'all',
2272  },
2273  {
2274  '1' => '65533102:69307244',
2275  '1-Idd5.3_DIL' => 'all',
2276  },
2277  {
2278  '1' => '130232728:130661594',
2279  '1-Idd5.4_DIL' => 'all',
2280  },
2281  {
2282  '3' => '36492618:37600833',
2283  '3-Idd3_DIL' => 'all',
2284  '3-Idd3_129' => 'all',
2285  },
2286  {
2287  '3' => '99848826:101467080',
2288  '3-Idd10_DIL' => 'all',
2289  },
2290  {
2291  '3' => '109144756:109930492',
2292  '3-Idd18.1_DIL' => 'all',
2293  },
2294  {
2295  '3' => '103489414:104054885',
2296  '3-Idd18.2_DIL' => 'all',
2297  },
2298  {
2299  '4' => '128371876:131853368',
2300  '4-Idd9.1_DIL' => 'all',
2301  },
2302  {
2303  '4' => '134841437:135252443',
2304  '4-Idd9.1M_DIL' => 'all',
2305  },
2306  {
2307  '4' => '146049976:149895141',
2308  '4-Idd9.2_DIL' => 'all',
2309  },
2310  {
2311  '4' => '149556939:151385803',
2312  '4-Idd9.3_DIL' => 'all',
2313  },
2314  {
2315  '6' => '143550839:149565172',
2316  '6-Idd6.1_2_CHO' => 'all',
2317  },
2318  {
2319  '6' => '129593784:131241919',
2320  '6-Idd6.AM_CHO' => 'all',
2321  },
2322  {
2323  '11' => '69890356:71340035',
2324  '11-Idd4.1_DIL' => 'all'
2325  },
2326  {
2327  '11' => '72734492:74404570',
2328  '11-Idd4.2_DIL' => 'all',
2329  },
2330  {
2331  '11' => '86785996:90007691',
2332  '11-Idd4.2Q_CHO' => 'all',
2333  },
2334  {
2335  '17' => '27302611:29220265',
2336  '17-Idd16.1_CHO' => 'all',
2337  },
2338  {
2339  '17' => '33567721:38721962',
2340  '17-Idd1_CHO' => 'all',
2341  '17-Idd1_DIL' => 'all',
2342  },
2343  ],
2344  'pig' => [
2345  {
2346  '7' => '24728583:29807435',
2347  '7-LW' => 'all',
2348  },
2349  {
2350  'X' => 'all',
2351  'X-WTSI' => 'all',
2352  },
2353  ],
2354  'zebrafish' => [],
2355  'gorilla' => [],
2356  'wallaby' => [],
2357  'chimp' => [],
2358  'rat' => [],
2359  };
2360 
2361 return $regions;
2362 
2363 }
2364 
2365 1;
confirm
public confirm()
usage
public usage()
Bio::EnsEMBL::Utils::ConversionSupport
Definition: ConversionSupport.pm:39
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
map
public map()
Script
Definition: dump_mysql.pl:9
run
public run()
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68