3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
34 schema conversion scripts
38 my $serverroot =
'/path/to/ensembl';
41 # parse common options
42 $support->parse_common_options;
44 # parse extra options for your script
45 $support->parse_extra_options(
'string_opt=s',
'numeric_opt=n' );
47 # ask user if he wants to run script with these parameters
48 $support->confirm_params;
50 # see individual method documentation for more stuff
54 This module is a collection of common methods and provides helper
55 functions
for the Vega release and schema conversion scripts. Amongst
56 others, it reads options from a config file, parses commandline options
63 package Bio::EnsEMBL::Utils::ConversionSupport;
67 no warnings
'uninitialized';
72 use FindBin qw($Bin $Script);
73 use POSIX qw(strftime);
77 use Fcntl qw(:flock SEEK_END);
79 my $species_c = 1; #counter to be used
for each database connection made
83 Arg[1] : String $serverroot - root directory of your ensembl sandbox
86 Description : constructor
88 Exceptions : thrown
if no serverroot is provided
95 (my $serverroot = shift) or
throw(
"You must supply a serverroot.");
97 '_serverroot' => $serverroot,
98 '_param' => { interactive => 1 },
101 bless ($self, $class);
105 =head2 parse_common_options
107 Example : $support->parse_common_options;
108 Description : This method reads options from a configuration file and parses
109 some commandline options that are common to all scripts (like
110 db connection settings, help, dry-
run). Commandline options
111 will
override config file settings.
113 All options will be accessible via $self->param(
'name').
114 Return type :
true on success
115 Exceptions : thrown
if configuration file can
't be opened
120 sub parse_common_options {
123 # read commandline options
125 Getopt::Long::Configure("pass_through");
128 'host|dbhost|db_host=s
',
129 'port|dbport|db_port=n
',
130 'user|dbuser|db_user=s
',
131 'pass|dbpass|db_pass=s
',
142 'logappend|log_append
',
152 my $conffile = $h{'conffile
'} || $self->serverroot . "/sanger-plugins/vega/conf/ini-files/Conversion.ini";
153 $conffile = abs_path($conffile);
155 open(my $fh, '<
', $conffile) or throw(
156 "Unable to open configuration file $conffile for reading: $!");
157 my $serverroot = $self->serverroot;
165 # read options into internal parameter datastructure, removing whitespace
166 next unless (/(\w\S*)\s*=\s*(\S*)\s*/);
169 if ($val =~ /\$SERVERROOT/) {
170 $val =~ s/\$SERVERROOT/$serverroot/g;
171 $val = abs_path($val);
173 $self->param($name, $val);
176 $self->param('conffile
', $conffile);
179 warning("Unable to open configuration file $conffile for reading: $!");
182 # override configured parameter with commandline options
183 map { $self->param($_, $h{$_}) } keys %h;
185 return (1) if $self->param('nolog
');
187 # if logpath & logfile are not set, set them here to /ensemblweb/vega_dev/shared/logs/conversion/DBNAME/SCRIPNAME_NN.log
188 if (! defined($self->param('log_base_path
'))) {
189 $self->param('log_base_path
','/ensemblweb/shared/logs/conversion/
');
191 my $dbname = $self->param('dbname
');
192 $dbname =~ s/^vega_//;
193 if (not (defined($self->param('logpath
')) )){
194 $self->param('logpath
', $self->param('log_base_path
')."/".$dbname."/" );
196 if ( not defined $self->param('logfile
') ){
200 for ($counter=1 ; (-e $self->param('logpath
')."/".$log."_".sprintf("%03d", $counter).".log"); $counter++){
201 # warn $self->param('logpath
')."/".$log."_".$counter.".log";
203 $self->param('logfile
', $log."_".sprintf("%03d", $counter).".log");
208 =head2 parse_extra_options
210 Arg[1-N] : option descriptors that will be passed on to Getopt::Long
211 Example : $support->parse_extra_options('string_opt=s
', 'numeric_opt=n
');
212 Description : Parse extra commandline options by passing them on to
213 Getopt::Long and storing parameters in $self->param('name).
214 If the last option is
'...' then unknown options are kept
215 in @ARGV
for future calls to
this method. Without
'...'
216 then it is an error to have further options.
217 Return type :
true on success
218 Exceptions : none (caugth by $self->error)
223 sub parse_extra_options {
224 my ($self, @params) = @_;
225 if(@params and $params[-1] eq
"...") {
227 Getopt::Long::Configure(
"pass_through");
229 Getopt::Long::Configure(
"no_pass_through");
232 # catch warnings to pass to $self->error
233 local $SIG{__WARN__} = sub { die @_; };
234 &GetOptions(\%{ $self->{
'_param'} }, @params);
236 $self->error($@)
if $@;
240 =head2 allowed_params
242 Arg[1-N] : (optional) List of allowed parameters to set
243 Example : my @allowed = $self->allowed_params(qw(param1 param2));
244 Description : Getter/setter
for allowed parameters. This is used by
245 $self->confirm_params() to avoid cluttering of output with
246 conffile entries not relevant
for a given script. You can use
247 $self->get_common_params() as a shortcut to set them.
248 Return type : Array - list of allowed parameters
259 @{ $self->{
'_allowed_params'} } = @_;
263 if (ref($self->{
'_allowed_params'}) eq
'ARRAY') {
264 return @{ $self->{
'_allowed_params'} };
270 =head2 get_common_params
272 Example : my @allowed_params = $self->get_common_params,
'extra_param';
273 Description : Returns a list of commonly used parameters in the conversion
274 scripts. Shortcut
for setting allowed parameters with
275 $self->allowed_params().
276 Return type : Array - list of common parameters
282 sub get_common_params {
303 =head2 get_loutre_params
305 Arg : (optional)
return a list to parse or not
306 Example : $support->parse_extra_options($support->get_loutre_params(
'parse'))
307 Description : Returns a list of commonly used loutre db parameters - parse option is
308 simply used to distinguish between reporting and parsing parameters
309 Return type : Array - list of common parameters
315 sub get_loutre_params {
337 =head2 get_annotrack_params
339 Arg : (optional)
return a list to parse or not
340 Example : $support->parse_extra_options($support->get_annotrack_params(
'parse'))
341 Description : Returns a list of parameters used to connect to annotrack
342 Return type : Array - list of common parameters
348 sub get_annotrack_params {
372 =head2 remove_vega_params
374 Example : $support->remove_vega_params;
375 Description : Removes Vega db conection parameters. Usefull to avoid clutter in log files when
376 working exclusively with loutre
383 sub remove_vega_params {
385 foreach my $param (qw(dbname host port user pass)) {
386 $self->{
'_param'}{$param} = undef;
390 =head2 confirm_params
392 Example : $support->confirm_params;
393 Description : Prints a table of parameters that were collected from config
394 file and commandline and asks user to
confirm if he wants
396 Return type :
true on success
405 # print parameter table
406 return 1
if($self->param(
'hideparamlist') and not $self->param(
'interactive'));
407 print
"Running script with these parameters:\n\n";
408 print $self->list_all_params;
410 if ($self->param(
'host') eq
'ensweb-1-10') {
411 # ask user if he wants to proceed
412 exit unless $self->user_proceed(
"**************\n\n You're working on ensdb-1-10! Is that correct and you want to continue ?\n\n**************");
415 # ask user if he wants to proceed
416 exit unless $self->user_proceed(
"Continue?");
421 =head2 list_all_params
423 Example : print LOG $support->list_all_params;
424 Description : prints a table of the parameters used in the script
425 Return type : String - the table to print
431 sub list_all_params {
433 my $txt = sprintf
" %-25s%-110s\n", qw(PARAMETER VALUE);
434 $txt .=
" " .
"-"x136 .
"\n";
435 $Text::Wrap::columns = 130;
436 my @params = $self->allowed_params;
437 foreach my $key (@params) {
438 my @vals = $self->param($key);
440 $txt .= Text::Wrap::wrap( sprintf(
' %-25s', $key),
450 =head2 create_commandline_options
452 Arg[1] : Hashref $settings - hashref describing what to
do
454 allowed_params => 0|1 # use all allowed parameters
455 exclude => [] # listref of parameters to exclude
456 replace => {param => newval} # replace value of param with
458 Example : $support->create_commandline_options({
460 exclude => [
'verbose'],
461 replace => {
'dbname' =>
'homo_sapiens_vega_33_35e' }
463 Description : Creates a commandline options
string that can be passed to any
464 other script
using ConversionSupport.
465 Return type : String - commandline options
string
471 sub create_commandline_options {
472 my ($self, $settings, $param_hash) = @_;
473 my %param_hash = $param_hash ? %$param_hash : ();
475 # get all allowed parameters
476 if ($settings->{
'allowed_params'}) {
477 # exclude params explicitly stated
478 my %exclude =
map { $_ => 1 } @{ $settings->{
'exclude'} || [] };
479 foreach my $param ($self->allowed_params) {
480 unless ($exclude{$param}) {
481 my ($first, @rest) = $self->param($param);
482 next unless (defined($first));
485 $first = join(
",", $first, @rest);
487 $param_hash{$param} = $first;
493 foreach my $key (keys %{ $settings->{
'replace'} || {} }) {
494 $param_hash{$key} = $settings->{
'replace'}->{$key};
497 # create the commandline options string
499 foreach my $param (keys %param_hash) {
500 $options_string .= sprintf(
"--%s %s ", $param, $param_hash{$param});
502 return $options_string;
505 =head2 check_required_params
507 Arg[1-N] : List @params - parameters to check
508 Example : $self->check_required_params(qw(dbname host port));
509 Description : Checks $self->param to make sure the requested parameters
510 have been set. Dies
if parameters are missing.
511 Return type :
true on success
517 sub check_required_params {
518 my ($self, @params) = @_;
520 foreach my $param (@params) {
521 push @missing, $param unless defined $self->param($param);
524 throw(
"Missing parameters: @missing.\nYou must specify them on the commandline or in your conffile.\n");
531 Arg[1] : (optional) String $text - notification text to present to user
532 Example : #
run a code snipped conditionally
533 if ($support->user_proceed(
"Run the next code snipped?")) {
537 # exit if requested by user
538 exit unless ($support->user_proceed(
"Want to continue?"));
539 Description : If running interactively, the user is asked
if he wants to
540 perform a script action. If he doesn
't, this section is skipped
541 and the script proceeds with the code. When running
542 non-interactively, the section is run by default.
543 Return type : TRUE to proceed, FALSE to skip.
550 my ($self, $text) = @_;
552 if ($self->param('interactive
')) {
553 print "$text\n" if $text;
557 unless ($input eq 'y
') {
567 =head2 read_user_input
569 Arg[1] : (optional) String $text - notification text to present to user
570 Example : my $ret = $support->read_user_input("Choose a number [1/2/3]");
573 } elsif ($ret == 2) {
576 Description : If running interactively, the user is asked for input.
577 Return type : String - user's input
583 sub read_user_input {
584 my ($self, $text) = @_;
586 if ($self->param(
'interactive')) {
587 print
"$text\n" if $text;
596 Arg[1-N] : list of parameter names to parse
597 Example : $support->comma_to_list(
'chromosomes');
598 Description : Transparently converts comma-separated lists into arrays (to
599 allow different styles of commandline options, see perldoc
600 Getopt::Long
for details). Parameters are converted in place
601 (accessible through $self->param(
'name')).
602 Return type :
true on success
610 foreach my $param (@_) {
612 split (/,/, join (
',', $self->param($param))));
619 Arg[1] : Name of parameter to parse
620 Example : $support->list_or_file(
'gene');
621 Description : Determines whether a parameter holds a list or it is a filename
622 to read the list entries from.
623 Return type :
true on success
624 Exceptions : thrown
if list file can
't be opened
630 my ($self, $param) = @_;
631 my @vals = $self->param($param);
632 return unless (@vals);
634 my $firstval = $vals[0];
635 if (scalar(@vals) == 1 && -e $firstval) {
636 # we didn't get a list of values, but a file to read values from
638 open(my $fh,
'<', $firstval) or
throw(
"Cannot open $firstval for reading: $!");
644 $self->param($param, @vals);
646 $self->comma_to_list($param);
652 Arg[1] : Parameter name
653 Arg[2-N] : (optional) List of values to set
654 Example : my $dbname = $support->param(
'dbname');
655 $support->param(
'port', 3306);
656 $support->param(
'chromosomes', 1, 6,
'X');
657 Description : Getter/setter
for parameters. Accepts single-value params and
659 Return type : Scalar value
for single-value parameters, array of values
for
661 Exceptions : thrown
if no parameter name is supplied
668 my $name = shift or
throw(
"You must supply a parameter name");
672 if (scalar(@_) == 1) {
674 $self->{
'_param'}->{$name} = shift;
677 undef $self->{
'_param'}->{$name};
678 @{ $self->{
'_param'}->{$name} } = @_;
683 if (ref($self->{
'_param'}->{$name}) eq
'ARRAY') {
685 return @{ $self->{
'_param'}->{$name} };
686 } elsif (defined($self->{
'_param'}->{$name})) {
687 # single-value parameter
688 return $self->{
'_param'}->{$name};
696 Arg[1] : (optional) String - error message
697 Example : $support->error(
"An error occurred: $@");
698 exit(0) if $support->error;
699 Description : Getter/setter for error messages
700 Return type : String - error message
708 $self->{
'_error'} = shift
if (@_);
709 return $self->{
'_error'};
714 Example : print LOG
"There were ".$support->warnings.
" warnings.\n";
715 Description : Returns the number of warnings encountered
while running the
716 script (the warning counter is increased by $self->log_warning).
717 Return type : Int - number of warnings
725 return $self->{
'_warnings'};
730 Arg[1] : (optional) String - root directory of your ensembl sandbox
731 Example : my $serverroot = $support->serverroot;
732 Description : Getter/setter
for the root directory of your ensembl sandbox.
733 This is set when ConversionSupport
object is created, so
734 usually only used as a getter.
735 Return type : String - the server root directory
743 $self->{
'_serverroot'} = shift
if (@_);
744 return $self->{
'_serverroot'};
749 Arg[1] : String $database - the type of database to connect to
751 Arg[2] : (optional) String $prefix - the prefix used
for retrieving the
752 connection settings from the configuration
753 Example : my $db = $support->get_database(
'core');
754 Description : Connects to the database specified.
755 Return type : DBAdaptor of the appropriate type
756 Exceptions : thrown
if asking
for unknown database
763 my $database = shift or
throw(
"You must provide a database");
764 my $prefix = shift ||
'';
765 $self->check_required_params(
772 core =>
'Bio::EnsEMBL::DBSQL::DBAdaptor',
773 ensembl =>
'Bio::EnsEMBL::DBSQL::DBAdaptor',
774 evega =>
'Bio::EnsEMBL::DBSQL::DBAdaptor',
775 otter =>
'Bio::Otter::DBSQL::DBAdaptor',
776 vega =>
'Bio::Otter::DBSQL::DBAdaptor',
777 compara =>
'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor',
778 loutre =>
'Bio::Vega::DBSQL::DBAdaptor',
780 throw(
"Unknown database: $database") unless $adaptors{$database};
782 $self->dynamic_use($adaptors{$database});
783 my $species =
'species' . $species_c;
784 my $dba = $adaptors{$database}->new(
785 -host => $self->param(
"${prefix}host"),
786 -port => $self->param(
"${prefix}port"),
787 -user => $self->param(
"${prefix}user"),
788 -pass => $self->param(
"${prefix}pass") ||
'',
789 -dbname => $self->param(
"${prefix}dbname"),
791 -species => $species,
793 #can use this approach to get dna from another db
794 # my $dna_db = $adaptors{$database}->new(
795 # -host => 'otterlive',
797 # -user => $self->param("${prefix}user"),
798 # -pass => $self->param("${prefix}pass"),
799 # -dbname => 'loutre_human',
801 # $dba->dnadb($dna_db);
803 # otherwise explicitely set the dnadb to itself - by default the Registry assumes
804 # a group 'core' for this now
809 $self->{
'_dba'}->{$database} = $dba;
810 $self->{
'_dba'}->{
'default'} = $dba unless $self->{
'_dba'}->{
'default'};
811 return $self->{
'_dba'}->{$database};
815 =head2 get_dbconnection
817 Arg[1] : (optional) String $prefix - the prefix used
for retrieving the
818 connection settings from the configuration
819 Example : my $dbh = $self->get_dbconnection;
820 Description : Connects to the database server specified. You don
't have to
821 specify a database name (this is useful for running commands
822 like $dbh->do('show databases
')).
823 Return type : DBI database handle
824 Exceptions : thrown if connection fails
830 sub get_dbconnection {
834 $self->check_required_params(
840 my $dsn = "DBI:" . ($self->param('driver
')||'mysql
') .
841 ":host=" . $self->param("${prefix}host") .
842 ";port=" . $self->param("${prefix}port");
844 if ($self->param("${prefix}dbname")) {
845 $dsn .= ";dbname=".$self->param("${prefix}dbname");
851 my $user = $self->param("${prefix}user");
852 my $pass = $self->param("${prefix}pass");
854 $dbh = DBI->connect($dsn, $user, $pass, {'RaiseError
' => 1, 'PrintError
' => 0});
857 if (!$dbh || $@ || !$dbh->ping) {
858 $self->log_error("Could not connect to db server as user ".
859 $self->param("${prefix}user") .
860 " using [$dsn] as a locator:\n" . $DBI::errstr . $@);
863 $self->{'_dbh
'} = $dbh;
864 return $self->{'_dbh
'};
871 Arg[1] : (optional) String $database - type of db apaptor to retrieve
872 Example : my $dba = $support->dba;
873 Description : Getter for database adaptor. Returns default (i.e. created
874 first) db adaptor if no argument is provided.
875 Return type : Bio::EnsEMBL::DBSQL::DBAdaptor or Bio::Otter::DBSQL::DBAdaptor
882 my ($self, $database) = @_;
883 return $self->{'_dba
'}->{$database} || $self->{'_dba
'}->{'default'};
888 Arg [1] : String $classname - The name of the class to require/import
890 Description: Requires and imports the methods for the classname provided,
891 checks the symbol table so that it doesnot re-require modules
892 that have already been required.
893 Returntype : true on success
894 Exceptions : Warns to standard error if module fails to compile
900 my ($self, $classname) = @_;
901 my ($parent_namespace, $module) = $classname =~/^(.*::)(.*)$/ ? ($1,$2) : ('::
', $classname);
903 no strict 'refs
'; ## no critic
904 # return if module has already been imported
905 return 1 if $parent_namespace->{$module.'::
'} && %{ $parent_namespace->{$module.'::
'}||{} };
907 eval "require $classname"; ## no critic
908 throw("Failed to require $classname: $@") if ($@);
909 $classname->import();
916 Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
917 Arg[2] : (optional) String $version - coord_system version
918 Arg[3] : (optional) String $type - type of region eg chromsome (defaults to 'toplevel
')
919 Arg[4] : (optional) Boolean - return non reference slies as well (required for haplotypes eq 6-COX)
920 Arg[5] : (optional) Override chromosome parameter filtering with this array reference. Empty denotes all.
921 Example : my $chr_length = $support->get_chrlength($dba);
922 Description : Get all chromosomes and their length from the database. Return
923 chr_name/length for the chromosomes the user requested (or all
924 chromosomes by default)
925 Return type : Hashref - chromosome_name => length
926 Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
932 my ($self, $dba, $version,$type,$include_non_reference,$chroms) = @_;
934 $type ||= 'toplevel
';
936 throw("get_chrlength should be passed a Bio::EnsEMBL::DBSQL::DBAdaptor\n")
939 my $sa = $dba->get_SliceAdaptor;
941 my @chromosomes = map { $_->seq_region_name }
942 @{ $sa->fetch_all($type,$version,$include_non_reference) };
943 my %chr = map { $_ => $sa->fetch_by_region($type, $_, undef, undef, undef, $version)->length } @chromosomes;
945 my @wanted = $self->param('chromosomes
');
946 @wanted = @$chroms if defined $chroms and ref($chroms) eq 'ARRAY
';
949 # check if user supplied invalid chromosome names
950 foreach my $chr (@wanted) {
952 foreach my $chr_from_db (keys %chr) {
953 if ($chr_from_db eq $chr) {
959 #if you get this when using ensembl-vega then the dbname will be misleading.
960 warning("Didn't find chromosome $chr in database
" .
961 $self->param('dbname'));
965 # filter to requested chromosomes only
967 foreach my $chr_from_db (keys %chr) {
968 foreach my $chr (@wanted) {
969 if ($chr_from_db eq $chr) {
973 delete($chr{$chr_from_db});
980 =head2 get_ensembl_chr_mapping
982 Arg[1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $dba
983 Arg[2] : (optional) String $version - coord_system version
984 Example : my $ensembl_mapping = $support->get_ensembl_chr_mapping($dba);
985 Description : Gets a mapping between Vega chromosome names and their
986 equivalent Ensembl chromosomes. Works with non-reference chromosomes
987 Return type : Hashref - Vega name => Ensembl name
988 Exceptions : thrown if not passing a Bio::EnsEMBL::DBSQL::DBAdaptor
993 sub get_ensembl_chr_mapping {
994 my ($self, $dba, $version) = @_;
998 my $sa = $dba->get_SliceAdaptor;
999 my @chromosomes = map { $_->seq_region_name }
1000 @{ $sa->fetch_all('chromosome', $version, 1) };
1003 foreach my $chr (@chromosomes) {
1004 my $sr = $sa->fetch_by_region('chromosome', $chr, undef, undef, undef, $version);
1005 my ($ensembl_name_attr) = @{ $sr->get_all_Attributes('ensembl_name') };
1006 if ($ensembl_name_attr) {
1007 $chrs{$chr} = $ensembl_name_attr->value;
1015 =head2 get_taxonomy_id
1017 Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
1018 Example : my $sid = $support->get_taxonony_id($dba);
1019 Description : Retrieves the taxononmy ID from the meta table
1020 Return type : Int - the taxonomy ID
1021 Exceptions : thrown if no taxonomy ID is found in the database
1026 sub get_taxonomy_id {
1027 my ($self, $dba) = @_;
1028 $dba ||= $self->dba;
1029 my $sql = 'SELECT meta_value FROM meta WHERE meta_key = "species.taxonomy_id
"';
1030 my $sth = $dba->dbc->db_handle->prepare($sql);
1032 my ($tid) = $sth->fetchrow_array;
1034 $self->throw("Could not determine taxonomy_id from database.
") unless $tid;
1038 =head2 get_species_scientific_name
1040 Arg[1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba
1041 Example : my $species = $support->get_species_scientific_name($dba);
1042 Description : Retrieves the species scientific name (Genus species) from the
1044 Return type : String - species scientific name
1045 Exceptions : thrown if species name can not be determined from db
1050 sub get_species_scientific_name {
1051 my ($self, $dba) = @_;
1052 $dba ||= $self->dba;
1053 my $sql = "SELECT meta_value FROM meta WHERE meta_key = \
'species.scientific_name\'";
1054 my $sth = $dba->dbc->db_handle->prepare($sql);
1057 while (my @row = $sth->fetchrow_array) {
1060 if (! @sp || @sp > 1) {
1061 $self->throw("Could not retrieve a single species scientific name from database.");
1064 my $species = $sp[0];
1065 $species =~ s/ /_/g;
1071 Arg[1] : (optional) String $species - species name to set
1072 Example : my $species = $support->species;
1073 my $url = "http://vega.sanger.ac.uk/$species/";
1074 Description : Getter/setter for species name (Genus_species). If not set, it's
1075 determined from database
's meta table
1076 Return type : String - species name
1084 $self->{'_species
'} = shift if (@_);
1085 # get species name from database if not set
1086 unless ($self->{'_species
'}) {
1087 $self->{'_species
'} = $self->get_species_scientific_name;
1089 return $self->{'_species
'};
1092 =head2 sort_chromosomes
1094 Arg[1] : (optional) Hashref $chr_hashref - Hashref with chr_name as keys
1095 Example : my $chr = { '6-COX
' => 1, '1
' => 1, 'X
' => 1 };
1096 my @sorted = $support->sort_chromosomes($chr);
1097 Description : Sorts chromosomes in an intuitive way (numerically, then
1098 alphabetically). If no chromosome hashref is passed, it's
1099 retrieve by calling $self->get_chrlength()
1100 Return type : List - sorted chromosome names
1101 Exceptions : thrown
if no hashref is provided
1106 sub sort_chromosomes {
1107 my ($self, $chr_hashref, $version, $include_non_reference) = @_;
1108 $chr_hashref = $self->get_chrlength($self->dba, $version,
'chromosome', $include_non_reference) unless ($chr_hashref);
1109 throw(
"You have to pass a hashref of your chromosomes")
1110 unless ($chr_hashref and ref($chr_hashref) eq
'HASH');
1111 return (sort _by_chr_num keys %$chr_hashref);
1116 Example : my @sorted = sort _by_chr_num qw(X, 6-COX, 14, 7);
1117 Description : Subroutine to use in sort
for sorting chromosomes. Sorts
1118 numerically, then alphabetically
1119 Return type : values to be used by sort
1121 Caller :
internal ($self->sort_chromosomes)
1126 my @awords = split /-/, $a;
1127 my @bwords = split /-/, $b;
1129 my $anum = $awords[0];
1130 my $bnum = $bwords[0];
1132 if ($anum !~ /^[0-9]*$/) {
1133 if ($bnum !~ /^[0-9]*$/) {
1134 return $anum cmp $bnum;
1139 if ($bnum !~ /^[0-9]*$/) {
1143 if ($anum <=> $bnum) {
1144 return $anum <=> $bnum;
1146 if ($#awords == 0) {
1148 } elsif ($#bwords == 0) {
1151 return $awords[1] cmp $bwords[1];
1156 =head2 split_chromosomes_by_size
1158 Arg[1] : (optional) Int $cutoff - the cutoff in bp between small and
1160 Arg[2] : (optional) Boolean to include duplicate regions, ie PAR or not
1162 Arg[3] : (optional) Coordsystem version to retrieve
1164 Example : my $chr_slices = $support->split_chromosomes_by_size;
1165 foreach my $block_size (keys %{ $chr_slices }) {
1166 print
"Chromosomes with blocksize $block_size: ";
1167 print join(
", ",
map { $_->seq_region_name }
1168 @{ $chr_slices->{$block_size} });
1170 Description : Determines block sizes
for storing DensityFeatures on
1171 chromosomes, and
return slices
for each chromosome. The block
1172 size is determined so that you have 150 bins
for the smallest
1173 chromosome over 5 Mb in length. For chromosomes smaller than 5
1174 Mb, an additional smaller block size is used to yield 150 bins
1175 for the overall smallest chromosome. This will result in
1176 reasonable resolution
for small chromosomes and high
1177 performance
for big ones. Does not
return non-reference seq_regions
1178 Return type : Hashref (key: block size; value: Arrayref of chromosome
1179 Bio::EnsEMBL::Slices)
1181 Caller : density scripts
1185 sub split_chromosomes_by_size {
1187 my $cutoff = shift || 5000000;
1188 my $dup = shift || 0;
1189 my $cs_version = shift;
1190 my $include_non_reference = 1; #get non reference slices
1191 my $slice_adaptor = $self->dba->get_SliceAdaptor;
1192 my ($top_slices, $wanted_slices, $missed_slices);
1193 if ($self->param(
'chromosomes')) {
1194 foreach my $chr ($self->param(
'chromosomes')) {
1195 push @{ $top_slices }, $slice_adaptor->fetch_by_region(
'chromosome', $chr);
1199 $top_slices = $slice_adaptor->fetch_all(
'chromosome',$cs_version,$include_non_reference,$dup);
1202 # filter out patches, if present
1203 $wanted_slices = [ grep { $_->is_reference or $self->is_haplotype($_,$self->dba) } @$top_slices ];
1205 # make a note of which ones are excluded
1206 $missed_slices = [ grep { ! $_->is_reference and ! $self->is_haplotype($_,$self->dba) } @$top_slices ];
1208 #warn non PATCH toplevels slice that are excluded (could be haplotypes if an earlier stage has failed)
1209 foreach (@{$missed_slices || []}) {
1210 my $sr_name = $_->seq_region_name;
1211 if ($self->is_patch($_)) {
1212 $self->log(
"Excluding $sr_name\n");
1215 $self->log_warning(
"Excluding $sr_name, are you sure ?\n");
1219 my ($big_chr, $small_chr, $min_big_chr, $min_small_chr);
1220 foreach my $slice (@{ $wanted_slices }) {
1221 next
if ($slice->length eq 10000); #hack
for chrY pseudoslice
1222 if ($slice->length < $cutoff) {
1223 if (! $min_small_chr or ($min_small_chr > $slice->length)) {
1224 $min_small_chr = $slice->length;
1226 # push small chromosomes onto $small_chr
1227 push @{ $small_chr }, $slice;
1229 elsif (! $min_big_chr or ($min_big_chr > $slice->length) ){
1230 $min_big_chr = $slice->length;
1232 # push _all_ chromosomes onto $big_chr
1233 push @{ $big_chr }, $slice;
1236 $chr_slices->{int($min_big_chr/150)} = $big_chr
if $min_big_chr;
1237 $chr_slices->{int($min_small_chr/150)} = $small_chr
if $min_small_chr;
1243 Arg[1] : B::E::Slice
1244 Example :
if ($support->is_patch($slice)) { ...
1245 Description : Looks at seq_region attributes to decide
if a slice is a patch or not
1246 If PATCH seq_region_attrib is not there check to see
if name suggests it is a PATCH
1247 Return type :
true/
false
1252 my ($self,$slice) = @_;
1253 my @patch_attrib_types = qw(patch_fix patch_novel); #seq_region_attribs used to define a patch
1254 foreach my $attrib_type (@patch_attrib_types) {
1255 if (@{$slice->get_all_Attributes($attrib_type)}) {
1265 Arg[1] : String $txt - the text to log
1266 Arg[2] : Int $indent - indentation level
for log message
1267 Example : my $log = $support->log_filehandle;
1268 $support->log(
'Log foo.\n', 1);
1269 Description : Logs a message to the filehandle initialised by calling
1270 $self->log_filehandle(). You can supply an indentation level
1271 to get nice hierarchical log messages.
1272 Return type :
true on success
1273 Exceptions : thrown when no filehandle can be obtained
1279 my ($self, $txt, $indent) = @_;
1282 # strip off leading linebreaks so that indenting doesn't break
1285 $txt = $1.
" "x$indent . $txt;
1286 my $fh = $self->{
'_log_filehandle'};
1287 throw(
"Unable to obtain log filehandle") unless $fh;
1294 Description : Use flock-style locks to lock log and fastforward to end.
1295 Useful
if log is being written to by multiple processes.
1301 my $fh = $self->{
'_log_filehandle'};
1302 return if -t $fh or -p $fh; # Shouldn
't lock such things
1303 flock($self->{'_log_filehandle
'},LOCK_EX) || return 0;
1304 seek($self->{'_log_filehandle
'},0,SEEK_END); # fail ok, prob not reg file
1310 Description : Unlock log previously locked by lock_log.
1317 my $fh = $self->{'_log_filehandle
'};
1318 return if -t $fh or -p $fh; # We don't lock such things
1319 # flush is implicit in flock
1320 flock($self->{
'_log_filehandle'},LOCK_UN) ||
return 0;
1326 Arg[1] : String $txt - the warning text to log
1327 Arg[2] : Int $indent - indentation level
for log message
1328 Arg[3] : Bool - add a line
break before warning
if true
1329 Example : my $log = $support->log_filehandle;
1330 $support->log_warning(
'Log foo.\n', 1);
1331 Description : Logs a message via $self->log and increases the warning counter.
1332 Return type :
true on success
1339 my ($self, $txt, $indent, $break) = @_;
1340 $txt =
"WARNING: " . $txt;
1341 $txt =
"\n$txt" if ($break);
1342 $self->log($txt, $indent);
1343 $self->{
'_warnings'}++;
1349 Arg[1] : String $txt - the error text to log
1350 Arg[2] : Int $indent - indentation level
for log message
1351 Example : my $log = $support->log_filehandle;
1352 $support->log_error(
'Log foo.\n', 1);
1353 Description : Logs a message via $self->log and exits the script.
1361 my ($self, $txt, $indent) = @_;
1362 $txt =
"ERROR: ".$txt;
1363 $self->log($txt, $indent);
1364 $self->log(
"Exiting.\n");
1370 Arg[1] : String $txt - the warning text to log
1371 Arg[2] : Int $indent - indentation level
for log message
1372 Example : my $log = $support->log_filehandle;
1373 $support->log_verbose(
'Log this verbose message.\n', 1);
1374 Description : Logs a message via $self->log
if --verbose option was used
1375 Return type : TRUE on success, FALSE
if not verbose
1382 my ($self, $txt, $indent) = @_;
1383 return(0) unless $self->param(
'verbose');
1384 $self->log($txt, $indent);
1390 Arg[1] : String $txt - the warning text to log
1391 Arg[2] : Int $indent - indentation level
for log message
1392 Example : my $log = $support->log_filehandle;
1393 $support->log_stamped(
'Log this stamped message.\n', 1);
1394 Description : Appends timestamp and memory
usage to a message and logs it via
1396 Return type : TRUE on success
1403 my ($self, $txt, $indent) = @_;
1404 # append timestamp and memory usage to log text
1406 $txt .=
" ".$self->date_and_mem.$1;
1407 $self->log($txt, $indent);
1411 =head2 log_filehandle
1413 Arg[1] : (optional) String $mode - file access mode
1414 Example : my $log = $support->log_filehandle;
1415 # print to the filehandle
1416 print $log
'Lets start logging...\n';
1417 # log via the wrapper $self->log()
1418 $support->log(
'Another log message.\n');
1419 Description : Returns a filehandle
for logging (STDERR by
default, logfile
if
1420 set from config or commandline). You can use the filehandle
1421 directly to print to, or use the smart wrapper $self->log().
1422 Logging mode (truncate or append) can be set by passing the
1423 mode as an argument to log_filehandle(), or with the
1424 --logappend commandline option (
default: truncate)
1425 Return type : Filehandle - the filehandle to log to
1426 Exceptions : thrown
if logfile can
't be opened
1431 sub log_filehandle {
1432 my ($self, $mode, $date) = @_;
1434 $mode = '>>
' if ($self->param('logappend
'));
1436 if (my $logfile = $self->param('logfile
')) {
1437 $logfile .= "_$date" if $date;
1438 if (my $logpath = $self->param('logpath
')) {
1439 unless (-e $logpath) {
1440 system("mkdir $logpath") == 0 or
1441 $self->log_error("Can't create log dir $logpath: $!\n
");
1443 $logfile = "$logpath/$logfile
";
1445 open($fh, "$mode
", $logfile) or throw(
1446 "Unable to open $logfile
for writing: $!
");
1448 $self->{'_log_filehandle'} = $fh;
1449 return $self->{'_log_filehandle'};
1454 Arg[1] : String $mode - file access mode
1455 Arg[2] : String $file - input or output file
1456 Example : my $fh = $support->filehandle('>>', '/path/to/file');
1457 # print to the filehandle
1458 print $fh 'Your text goes here...\n';
1459 Description : Returns a filehandle (*STDOUT for writing, *STDIN for reading
1460 by default) to print to or read from.
1461 Return type : Filehandle - the filehandle
1462 Exceptions : thrown if file can't be opened
1468 my ($self, $mode, $file) = @_;
1472 open($fh, "$mode
", $file) or throw(
1473 "Unable to open $file
for writing: $!
");
1474 } elsif ($mode =~ />/) {
1476 } elsif ($mode =~ /</) {
1482 =head2 init_log_date
1484 Example : $support->init_log_date;
1485 Description : Opens a filehandle to a logfile with the date in the file name
1486 Return type : Filehandle - the log filehandle
1494 my $date = $self->date;
1495 return $self->init_log($date);
1500 Example : $support->init_log;
1501 Description : Opens a filehandle to the logfile and prints some header
1502 information to this file. This includes script name, date, user
1503 running the script and parameters the script will be running
1505 Return type : Filehandle - the log filehandle
1515 # get a log filehandle
1516 my $log = $self->log_filehandle(undef,$date);
1518 # print script name, date, user who is running it
1519 unless($self->param('hideparamlist')) {
1520 my $hostname = `hostname`;
1522 my $script = "$hostname:$Bin/$Script
";
1523 my $user = `whoami`;
1525 $self->log("Script: $script\nDate:
".$self->date_and_time."\nUser: $user\n
");
1527 # print parameters the script is running with
1528 $self->log("Parameters:\n\n
");
1529 $self->log($self->list_all_params);
1531 # remember start time
1532 $self->{'_start_time'} = time;
1539 Example : $support->finish_log;
1540 Description : Writes footer information to a logfile. This includes the
1541 number of logged warnings, timestamp and memory footprint.
1542 Return type : TRUE on success
1551 unless($self->param('hideparamlist')) {
1552 $self->log("\nAll done.
".$self->warnings." warnings.
");
1553 if ($self->{'_start_time'}) {
1554 $self->log("Runtime
");
1555 my $diff = time - $self->{'_start_time'};
1556 my $sec = $diff % 60;
1557 $diff = ($diff - $sec) / 60;
1558 my $min = $diff % 60;
1559 my $hours = ($diff - $min) / 60;
1560 $self->log("${hours}h ${min}min ${sec}sec
");
1562 $self->log($self->date_and_mem."\n\n
");
1564 if($self->param('joblog')) {
1566 unless(open($fh,'>',$self->param('joblog'))) {
1567 $self->log_warning("Could not log job to
'".$self->param('joblog
').
1572 push @keys,"RUNTIME
",(time - $self->{'_start_time'});
1573 my $mem = `ps -p $$ -o vsz |tail -1`;
1575 push @keys,"MEMORY
",$mem;
1576 push @keys,"WARNINGS
",$self->warnings;
1577 push @keys,"START
",$self->{'_start_time'};
1578 push @keys,"END
",time;
1580 my ($k,$v) = splice(@keys,0,2);
1581 print $fh "$k: $v\n
";
1590 Example : print LOG "Time, memory
usage:
".$support->date_and_mem."\n
";
1591 Description : Prints a timestamp and the memory usage of your script.
1592 Return type : String - timestamp and memory usage
1599 my $date = strftime "%Y-%m-%d %T
", localtime;
1600 my $mem = `ps -p $$ -o vsz |tail -1`;
1602 return "[$date, mem $mem]
";
1607 Example : print "Date:
" . $support->date . "\n
";
1608 Description : Prints a nicely formatted datetamp (YYYY-DD-MM)
1609 Return type : String - the timestamp
1616 return strftime "%Y-%m-%d
", localtime;
1619 =head2 date_and_time
1621 Example : print "Date:
" . $support->date . "\n
";
1622 Description : Prints a nicely formatted timestamp (YYYY-DD-MM hh:mm:ss)
1623 Return type : String - the timestamp
1630 return strftime "%Y-%m-%d %T
", localtime;
1635 Example : print $support->format_time($gene->modifed_date) . "\n
";
1636 Description : Prints timestamps from the database
1637 Return type : String - nicely formatted time stamp
1645 my( $self, $time, $format ) = @_;
1646 my( $d,$m,$y) = (localtime($time))[3,4,5];
1647 my %S = ('d'=>sprintf('%02d',$d),'m'=>sprintf('%02d',$m+1),'y'=>$y+1900);
1648 (my $res = $format ) =~s/%(\w)/$S{$1}/ge;
1655 Example : print "Memory
usage:
" . $support->mem . "\n
";
1656 Description : Prints the memory used by your script. Not sure about platform
1657 dependence of this call ...
1658 Return type : String - memory usage
1665 my $mem = `ps -p $$ -o vsz |tail -1`;
1672 Arg[1] : Int $num - a number to commify
1673 Example : print "An easy to read number:
".$self->commify(100000000);
1674 # will print 100,000,000
1675 Description : put commas into a number to make it easier to read
1676 Return type : a string representing the commified number
1687 $num = reverse($num);
1688 $num =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
1690 return scalar reverse $num;
1693 =head2 fetch_non_hidden_slices
1695 Arg[1] : B::E::SliceAdaptor
1696 Arg[2] : B::E::AttributeAdaptor
1697 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
1698 Arg[4] : string $coord_system_version (optional) - 'otter' by default
1699 Example : $chroms = $support->fetch_non_hidden_slice($sa,$aa);
1700 Description : retrieve all slices from a loutre database that don't have a hidden attribute.
1701 Doesn't retrieve non-reference slices
1702 Return type : arrayref
1708 sub fetch_non_hidden_slices {
1710 my $aa = shift or throw("You must supply an attribute adaptor
");
1711 my $sa = shift or throw("You must supply a slice adaptor
");
1712 my $cs = shift || 'chromosome';
1713 my $cv = shift || 'Otter';
1715 foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
1716 my $chrom_name = $chrom->name;
1717 my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
1718 if ( scalar(@$attribs) > 1 ) {
1719 $self->log_warning("More than one hidden attribute
for chromosome $chrom_name\n
");
1721 elsif ($attribs->[0]->value == 0) {
1722 push @$visible_chroms, $chrom;
1724 elsif ($attribs->[0]->value == 1) {
1725 $self->log_verbose("chromosome $chrom_name is hidden\n
");
1728 $self->log_warning("No hidden attribute
for chromosome $chrom_name\n
");
1731 return $visible_chroms;
1734 =head2 get_non_hidden_slice_names
1736 Arg[1] : B::E::SliceAdaptor
1737 Arg[2] : B::E::AttributeAdaptor
1738 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
1739 Arg[4] : string $coord_system_version (optional) - 'otter' by default
1740 Example : $chrom_names = $support->get_non_hidden_slice_names($sa,$aa);
1741 Description : retrieve names of all slices from a loutre database that don't have a hidden attribute.
1742 Doesn't retrieve non-reference slices
1743 Return type : arrayref of names of all non-hidden slices
1749 sub get_non_hidden_slice_names {
1751 my $aa = shift or throw("You must supply an attribute adaptor
");
1752 my $sa = shift or throw("You must supply a slice adaptor
");
1753 my $cs = shift || 'chromosome';
1754 my $cv = shift || 'Otter';
1755 my $visible_chrom_names;
1756 foreach my $chrom ( @{$sa->fetch_all($cs,$cv)} ) {
1757 my $chrom_name = $chrom->seq_region_name;
1758 my $attribs = $aa->fetch_all_by_Slice($chrom,'hidden');
1759 if ( scalar(@$attribs) > 1 ) {
1760 $self->log_warning("More than one hidden attribute
for chromosome $chrom_name\n
");
1762 elsif ($attribs->[0]->value == 0) {
1763 push @$visible_chrom_names, $chrom_name;
1765 elsif ($attribs->[0]->value == 1) {
1766 $self->log_verbose("chromosome $chrom_name is hidden\n
");
1769 $self->log_warning("No hidden attribute
for chromosome $chrom_name\n
");
1772 return $visible_chrom_names;
1776 =head2 get_wanted_chromosomes
1778 Arg[1] : B::E::SliceAdaptor
1779 Arg[2] : B::E::AttributeAdaptor
1780 Arg[3] : string $coord_system_name (optional) - 'chromosome' by default
1781 Arg[4] : string $coord_system_version (optional) - 'otter' by default
1782 Example : $chr_names = $support->get_wanted_chromosomes($laa,$lsa);
1783 Description : retrieve names of slices from a lutra database that are ready for dumping to Vega.
1784 Deals with list of names to ignore (ignore_chr = LIST)
1785 Return type : arrayref of slices
1791 sub get_wanted_chromosomes {
1793 my $aa = shift or throw("You must supply an attribute adaptor
");
1794 my $sa = shift or throw("You must supply a slice adaptor
");
1795 my $cs = shift || 'chromosome';
1796 my $cv = shift || 'Otter';
1797 my $export_mode = $self->param('release_type');
1798 my $release = $self->param('vega_release');
1800 my $chroms = $self->fetch_non_hidden_slices($aa,$sa,$cs,$cv);
1802 foreach my $chrom (@$chroms) {
1803 my $attribs = $aa->fetch_all_by_Slice($chrom);
1804 my $vals = $self->get_attrib_values($attribs,'vega_export_mod');
1805 if (scalar(@$vals > 1)) {
1806 $self->log_warning ("Multiple attribs
for \
'vega_export_mod\', please fix before continuing");
1809 next CHROM if (! grep { $_ eq $export_mode} @$vals);
1810 $vals = $self->get_attrib_values($attribs,'vega_release
',$release);
1811 if (scalar(@$vals > 1)) {
1812 $self->log_warning ("Multiple attribs for \'vega_release\' value = $release , please fix before continuing");
1815 next CHROM if (! grep { $_ eq $release} @$vals);
1816 my $name = $chrom->seq_region_name;
1817 if (my @ignored = $self->param('ignore_chr
')) {
1818 next CHROM if (grep {$_ eq $name} @ignored);
1820 push @{$names}, $name;
1827 Arg[1] : B::E::Slice
1828 Arg[2]: : B::E::DBAdaptor (optional, if you don't supply one then the *first* one you generated is returned, which may or may not be what you want!)
1829 Description : Is the slice a Vega haplotype? At the moment
this is
1830 implemented by testing
for presence of vega_ref_chrom but non_ref
1831 which is correct in practice, but really misses the prupose of
1832 vega_ref_chrom, so
this might bite us
if that changes.
1833 Return type :
boolean
1838 my ($self,$slice,$dba) = @_;
1840 $dba ||= $self->dba;
1841 my $aa = $dba->get_adaptor(
'Attribute');
1843 my $attribs = $aa->fetch_all_by_Slice($slice);
1844 return (@{$self->get_attrib_values($attribs,
'vega_ref_chrom')} and
1845 @{$self->get_attrib_values($attribs,
'non_ref',1)});
1848 =head2 get_unique_genes
1850 Arg[1] : B::E::Slice
1851 Arg[2] : B::E::DBAdaptor (optional,
if you don
't supply one then the *first* one you generated is returned, which may or may not be what you want!)
1852 Example : $genes = $support->get_unique_genes($slice,$dba);
1853 Description : Retrieve genes that are only on the slice itself - used for human where assembly patches
1854 are in the assembly_exception table. Needs the PATCHes to have 'non_ref
' seq_region_attributes.
1855 Return type : arrayref of genes
1861 sub get_unique_genes {
1863 my ($slice,$dba) = @_;
1864 $slice or throw("You must supply a slice");
1865 $dba ||= $self->dba;
1867 my $sa = $dba->get_adaptor('Slice
');
1868 my $ga = $dba->get_adaptor('Gene
');
1871 if ( ! $slice->is_reference() and ! $self->is_haplotype($slice,$dba) ) {
1874 my $slices = $sa->fetch_by_region_unique( $slice->coord_system_name(),$slice->seq_region_name(),undef,undef,undef,$slice->coord_system()->version() );
1875 foreach my $slice ( @{$slices} ) {
1876 push @$genes,@{$ga->fetch_all_by_Slice($slice)};
1877 # my $start = $slice->start;
1881 $genes = $ga->fetch_all_by_Slice($slice);
1883 return ($genes, $patch);
1888 =head2 get_attrib_values
1890 Arg[1] : Arrayref of B::E::Attributes
1891 Arg[2] : 'code
' to search for
1892 Arg[3] : 'value
' to search for (optional)
1893 Example : my $c = $self->get_attrib_values($attribs,'name
'));
1894 Description : (i) In the absence of an attribute value argument, examines an arrayref
1895 of B::E::Attributes for a particular attribute type, returning the values
1896 for each attribute of that type. Can therefore be used to test for the
1897 number of attributes of that type.
1898 (ii) In the presence of the optional value argument it returns all
1899 attributes with that value ie can be used to test for the presence of an
1900 attribute with that particular value.
1901 Return type : arrayref of values for that attribute
1907 sub get_attrib_values {
1909 my $attribs = shift;
1912 if (my @atts = grep {$_->code eq $code } @$attribs) {
1915 if (my @values = grep {$_->value eq $value} @atts) {
1917 push @$r, $_->value;
1927 push @$r, $_->value;
1937 =head2 fix_attrib_value
1939 Arg[1] : Arrayref of existing B::E::Attributes
1940 Arg[2] : dbID of object
1941 Arg[3] : name of object (just for reporting)
1942 Arg[4] : attrib_type.code
1943 Arg[5] : attrib_type.value
1944 Arg[6] : interactive ? (0 by default)
1946 Example : $support->fix_attrib_value($attribs,$chr_id,$chr_name,'vega_export_mod
','N
',1);
1947 Description : adds a new attribute to an object, or updates an existing attribute with a new value
1948 Can be run in interactive or non-interactive mode (default)
1949 Return type : arrayref of results
1951 Status : only ever tested with seq_region_attributes to date
1955 sub fix_attrib_value {
1957 my $attribs = shift;
1962 my $interact = shift || 0;
1963 my $table = shift || 'seq_region_attrib
';
1965 #transiently set interactive parameter to zero
1968 $int_before = $self->param('interactive
');
1969 $self->param('interactive
',0);
1972 #get any existing value(s) for this attribute
1973 my $existings = $self->get_attrib_values($attribs,$code);
1975 #add a new attribute if there is none...
1976 if (! @$existings ) {
1977 if ($self->user_proceed("Do you want to set $name attrib (code = $code) to value $value ?")) {
1978 my $r = $self->store_new_attribute($id,$code,$value);
1980 #reset interactive parameter
1981 $self->param('interactive
',$int_before) if (! $interact);
1985 #...warn and exit if you're trying to update more than one value
for the same attribute...
1986 elsif (scalar @$existings > 1) {
1987 $self->log_warning(
"You shouldn't be trying to update multiple attributes with the same code at once ($name:$code,$value), looks like you have duplicate entries in the (seq_region_)attrib table\n");
1991 #...or update an attribute with new values...
1993 my $existing = $existings->[0];
1994 if ($existing ne $value) {
1995 if ($self->user_proceed(
"Do you want to reset $name attrib (code = $code) from $existing to $value ?")) {
1996 my $r = $self->update_attribute($id,$code,$value);
1997 $self->param(
'interactive',$int_before) if (! $interact);
1998 push @$r, $existing;
2002 #...or make no change
2004 $self->param(
'interactive',$int_before)
if (! $interact);
2010 =head2 _get_attrib_id
2012 Arg[1] : attrib_type.code
2013 Arg[2] : database handle
2014 Example : $self->_get_attrib_id(
'name',$dbh)
2015 Description : get attrib_type.attrib_type_id from a attrib_type.code
2016 Return type : attrib_type.attrib_type_id
2022 sub _get_attrib_id {
2024 my $attrib_code = shift;
2026 my ($attrib_id) = $dbh->selectrow_array(
2027 qq(select attrib_type_id
2034 $self->log_warning(
"There is no attrib_type_id for code $attrib_code, please patch the attrib_table\n");
2042 =head2 store_new_attribute
2044 Arg[1] : seq_region.seq_region_id
2045 Arg[2] : attrib_type.code
2046 Arg[3] : attrib_type.value
2047 ARG[4] : table to update (seq_region_attribute by
default)
2048 Example : $support->store_new_attribute(23,name,5);
2049 Description : uses MySQL to store an entry (code and value) in an attribute table
2050 (seq_region_attrib by
default)
2051 Return type : array_ref
2057 sub store_new_attribute {
2060 my $attrib_code = shift;
2061 my $attrib_value = shift ||
'';
2062 my $table = shift ||
'seq_region_attrib';
2064 #get database handle
2065 my $dbh = $self->get_dbconnection(
'loutre');
2066 #get attrib_type_id for this particular attribute
2067 my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
2070 qq(insert into $table
2073 ($sr_id,$attrib_id,$attrib_value)
2075 return [
'Stored',$r];
2078 =head2 update_attribute
2080 Arg[1] : seq_region.seq_region_id
2081 Arg[2] : attrib_type.code
2082 Arg[3] : attrib_type.value
2083 ARG[4] : table to update (seq_region_attribute by
default)
2084 Example : $support->update_attribute(23,name,5);
2085 Description : uses MySQL to update an attribute table (seq_region_attrib by
default)
2086 Return type : array_ref
2092 sub update_attribute {
2095 my $attrib_code = shift;
2096 my $attrib_value = shift;
2097 my $table = shift ||
'seq_region_attrib';
2098 my $dbh = $self->get_dbconnection(
'loutre');
2099 my $attrib_id = $self->_get_attrib_id($attrib_code,$dbh);
2104 where seq_region_id = $sr_id
2105 and attrib_type_id = $attrib_id),
2109 return [
'Updated',$r];
2113 =head2 remove_duplicate_attribs
2117 Example : $support->remove_duplicate_attribs($dbh,
'gene');
2118 Description : uses MySQL to remove duplicate entries from an attribute table
2125 sub remove_duplicate_attribs {
2129 $dbh->do(qq(create table nondup_${table}_attrib like ${table}_attrib));
2130 $dbh->do(qq(insert into nondup_${table}_attrib (select ${table}_id, attrib_type_id, value from ${table}_attrib group by ${table}_id, attrib_type_id, value)));
2131 $dbh->do(qq(
delete from ${table}_attrib));
2132 $dbh->do(qq(insert into ${table}_attrib (select ${table}_id, attrib_type_id, value from nondup_${table}_attrib)));
2133 $dbh->do(qq(drop table nondup_${table}_attrib));
2138 Arg[1] : string (sequence to save)
2139 Example : $support->save_seq(
'ACGT')
2140 Description : creates a temporary file containing the sequence you give it
2141 Return type : string (filename)
2149 my $content = shift ;
2150 my $seq_file = $self->param(
'logpath') .
'/SEQ_' . time() . int(rand()*100000000) . $$;
2151 open (my $fh,
">$seq_file", $seq_file) or die(
"Cannot create working file.$!");
2157 =head2 get_alignment
2159 Arg[1] : string (first sequence)
2160 Arg[1] : string (second sequence)
2161 Arg[1] : string (sequence type))
2162 Example : $support->get_alignment(
'AAAAA',
'CCCCCCC',
'DNA')
2163 Description : creates a temporary file containing the sequence you give it
2164 Return type : string (filename)
2172 my $ext_seq = shift ||
return undef;
2173 my $int_seq = shift ||
return undef;
2174 $int_seq =~ s/<br \/>
2175 my $seq_type = shift ||
return undef;
2178 # To stop box running out of memory - put an upper limit on the size of sequence
2179 # that alignview can handle
2180 if (length $int_seq > 1e6 || length $ext_seq > 1e6) {
2181 $self->log_error(
'Cannot align if sequence > 1 Mbase');
2184 my $int_seq_file = $self->save_seq($int_seq);
2185 my $ext_seq_file = $self->save_seq($ext_seq);
2187 my $label_width =
'22'; # width of column
for e!
object label
2188 my $output_width = 61; # width of alignment
2189 my $dnaAlignExe =
'/localsw/bin/emboss/bin/matcher -asequence %s -bsequence %s -outfile %s';
2190 my $pepAlignExe =
'/localsw/bin/wise2/bin/psw -dymem explicit -m /localsw/bin/wise2/wisecfg/blosum62.bla %s %s -n %s -w %s > %s';
2192 my $out_file = time() . int(rand()*100000000) . $$;
2193 $out_file = $self->param(
'logpath').
'/' . $out_file .
'.out';
2197 if ($seq_type eq
'DNA') {
2198 $command = sprintf $dnaAlignExe, $int_seq_file, $ext_seq_file, $out_file;
2200 unless (open($fh,
"<", $out_file)) {
2201 $command = sprintf $dnaAlignExe, $int_seq_file, $ext_seq_file, $out_file;
2205 elsif ($seq_type eq
'PEP') {
2206 $command = sprintf $pepAlignExe, $int_seq_file, $ext_seq_file, $label_width, $output_width, $out_file;
2208 unless (open($fh,
"<", $out_file)) {
2209 $self->log_warning(
"Cannot open alignment file\n");
2221 |Commandline #matcher
2226 |Align_format #matcher
2227 |Report_file #matcher
2232 $alignment =~ s/\n+$
2234 unlink $int_seq_file;
2235 unlink $ext_seq_file;
2239 sub allowed_duplicate_regions {
2242 # set up lists of vega names of wanted haplotype / strains
2246 '6' =>
'28000000:34000000',
2256 '19' =>
'54020000:54910000',
2257 '19-PGF_1' =>
'all',
2258 '19-PGF_2' =>
'all',
2259 '19-COX_1' =>
'all',
2260 '19-COX_2' =>
'all',
2269 '1' =>
'60564732:63711641',
2270 '1-Idd5.1_DIL' =>
'all',
2271 '1-Idd5.1_CHO' =>
'all',
2274 '1' =>
'65533102:69307244',
2275 '1-Idd5.3_DIL' =>
'all',
2278 '1' =>
'130232728:130661594',
2279 '1-Idd5.4_DIL' =>
'all',
2282 '3' =>
'36492618:37600833',
2283 '3-Idd3_DIL' =>
'all',
2284 '3-Idd3_129' =>
'all',
2287 '3' =>
'99848826:101467080',
2288 '3-Idd10_DIL' =>
'all',
2291 '3' =>
'109144756:109930492',
2292 '3-Idd18.1_DIL' =>
'all',
2295 '3' =>
'103489414:104054885',
2296 '3-Idd18.2_DIL' =>
'all',
2299 '4' =>
'128371876:131853368',
2300 '4-Idd9.1_DIL' =>
'all',
2303 '4' =>
'134841437:135252443',
2304 '4-Idd9.1M_DIL' =>
'all',
2307 '4' =>
'146049976:149895141',
2308 '4-Idd9.2_DIL' =>
'all',
2311 '4' =>
'149556939:151385803',
2312 '4-Idd9.3_DIL' =>
'all',
2315 '6' =>
'143550839:149565172',
2316 '6-Idd6.1_2_CHO' =>
'all',
2319 '6' =>
'129593784:131241919',
2320 '6-Idd6.AM_CHO' =>
'all',
2323 '11' =>
'69890356:71340035',
2324 '11-Idd4.1_DIL' =>
'all'
2327 '11' =>
'72734492:74404570',
2328 '11-Idd4.2_DIL' =>
'all',
2331 '11' =>
'86785996:90007691',
2332 '11-Idd4.2Q_CHO' =>
'all',
2335 '17' =>
'27302611:29220265',
2336 '17-Idd16.1_CHO' =>
'all',
2339 '17' =>
'33567721:38721962',
2340 '17-Idd1_CHO' =>
'all',
2341 '17-Idd1_DIL' =>
'all',
2346 '7' =>
'24728583:29807435',