9 A Hive-specific URL parser.
13 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
14 Copyright [2016-2022] EMBL-European Bioinformatics Institute
16 Licensed under the Apache License,
Version 2.0 (the
"License"); you may not use
this file except in compliance with the License.
17 You may obtain a copy of the License at
21 Unless required by applicable law or agreed to in writing, software distributed under the License
22 is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 See the License
for the specific language governing permissions and limitations under the License.
27 Please subscribe to the
Hive mailing list: http:
32 package Bio::EnsEMBL::Hive::Utils::URL;
40 my $url = shift @_ or
return;
42 my ($old_parse, $new_parse,
43 $dbconn_part, $url_parts_hash, $table_name, $tparam_name, $tparam_value, $conn_param_string, $query_part);
45 # In case the whole URL is quoted (should we do this with double-quotes too ?) 46 if( $url=~/^
'(.*)'$/ ) {
53 'unambig_url' =>
':///',
54 'query_params' => {
'object_type' =>
'Analysis',
'logic_name' => $url, },
59 # Perform environment variable substitution separately with and without curly braces. 60 # Make sure expressions stay as they were if we were unable to substitute them. 62 $url =~ s/\$(?|\{(\w+)\}|(\w+))/defined($ENV{$1})?
"$ENV{$1}":
"\$$1"/eg;
64 if( ($dbconn_part, @$url_parts_hash{
'driver',
'user',
'pass',
'host',
'port',
'dbname'}, $table_name, $tparam_name, $tparam_value, $conn_param_string) =
67 my ($dummy, %conn_params) = split(/[;=]/, $conn_param_string
69 my $exception_from_OLD_format;
72 if($table_name eq
'analysis') {
73 $query_params->{
'object_type'} =
'Analysis';
74 $query_params->{$tparam_name} = $tparam_value; # $tparam_name is
'logic_name' or
'dbID', $tparam_value is the analysis_name or dbID
75 } elsif($table_name eq
'accu') {
76 $query_params->{
'object_type'} =
'Accumulator';
77 $query_params->{
'accu_name'} = $tparam_name;
78 $query_params->{
'accu_address'} = $tparam_value;
79 } elsif($table_name eq
'job') {
80 die
"Jobs cannot yet be located by URLs, sorry";
82 $query_params->{
'object_type'} =
'NakedTable';
83 $query_params->{
'table_name'} = $table_name;
85 if( $tparam_name eq
'insertion_method' ) { # extra hint on the OLD format from the insertion_method
86 $query_params->{
'insertion_method'} = $tparam_value;
87 } elsif( $tparam_name eq
'table_name' ) { # hinting
this is NEW format with a bipartite dbpath
88 $exception_from_OLD_format = 1;
94 if($exception_from_OLD_format) {
95 warn
"\nOLD URL parser thinks you are using the NEW URL syntax for a remote $query_params->{'object_type'}, so skipping it (it may be wrong!)\n";
97 my $unambig_url = hash_to_unambig_url( $url_parts_hash );
100 'dbconn_part' => $dbconn_part,
102 'conn_params' => \%conn_params,
103 'query_params' => $query_params,
104 'unambig_url' => $unambig_url,
109 if( ($dbconn_part, @$url_parts_hash{
'driver',
'user',
'pass',
'host',
'port',
'dbname'}, $query_part, $conn_param_string) =
112 my ($dummy, %conn_params) = split(/[;=]/, $conn_param_string
113 my $query_params = $query_part ? { split(/[&=]/, $query_part ) } : undef;
114 my $exception_from_NEW_format;
116 my ($driver, $dbname) = @$url_parts_hash{
'driver',
'dbname'};
118 if(!$query_params and ($driver eq
'mysql' or $driver eq
'pgsql') and $dbname and $dbname=~m{/}) { # a special
case of multipart dbpath hints at the OLD format (or none at all)
120 $query_params = {
'object_type' =>
'NakedTable' };
121 $exception_from_NEW_format = 1;
123 } elsif($query_params and not (my $object_type = $query_params->{
'object_type'})) { #
do a bit of guesswork:
125 if($query_params->{
'logic_name'}) {
126 $object_type =
'Analysis';
127 if($dbname and $dbname=~m{^([/~\w\-\.]*)/analysis$}) {
128 $exception_from_NEW_format = 1;
130 } elsif($query_params->{
'job_id'}) {
131 $object_type =
'AnalysisJob';
132 } elsif($query_params->{
'semaphore_id'}) {
133 $object_type =
'Semaphore';
134 } elsif($query_params->{
'accu_name'}) { # we don
't require $query_params->{'accu_address
'} to support scalar accu 136 } elsif($query_params->{'table_name
'}) { # NB: the order is important here, in case table_name is reset for non-NakedTables 138 } elsif($query_params->{'insertion_method
'}) { 140 if($dbname and $dbname=~m{^([/~\w\-\.]*)/(\w+)$}) { 141 $exception_from_NEW_format = 1; 145 $query_params->{'object_type
'} = $object_type; 148 if($exception_from_NEW_format) { 149 warn "\nNEW URL parser thinks you are using the OLD URL syntax for a remote $query_params->{'object_type
'}, so skipping it (it may be wrong!)\n"; 151 my $unambig_url = hash_to_unambig_url( $url_parts_hash ); 154 'dbconn_part
' => $dbconn_part, 156 'conn_params
' => \%conn_params, 157 'query_params
' => $query_params, 158 'unambig_url
' => $unambig_url, 165 if($new_parse and $old_parse) { 166 if(stringify($old_parse) eq stringify($new_parse)) { 169 warn "\nThe URL '$url
' can be parsed ambiguously:\n\t".stringify($old_parse)."\nvs\n\t".stringify($new_parse)."\n). Using the OLD parser at the moment.\nPlease change your URL to match the new format if you see weird behaviour\n\n"; 172 } elsif($new_parse) { 174 } elsif($old_parse) { 175 warn "\nThe URL '$url
' only works with the old parser, please start using the new syntax as the old parser will soon be deprecated\n\n"; 178 warn "\nThe URL '$url
' could not be parsed, please check it\n"; 184 =head2 hash_to_unambig_url 186 Arg [1] : a hash describing (at least) db connection parameters 187 Example : my $unambig_url = hash_to_unambig_url( $url_parts_hash ); 188 Description : Generates a degenerate URL that omits unnecessary parts (password, default port numbers) 189 : but tries to uniquely represent a connection. 190 Returntype : a string 194 sub hash_to_unambig_url { 195 my $url_parts_hash = shift @_; # expected to contain the parts from which to build a URL 197 my $driver = $url_parts_hash->{'driver
'} // ''; 198 my $unambig_port = $url_parts_hash->{'port
'} // { 'mysql
' => 3306, 'pgsql
' => 5432, 'sqlite
' => '' }->{$driver} // ''; 199 my $unambig_host = ( $url_parts_hash->{'host
'} // '' ) eq 'localhost
' ? '127.0.0.1
' : ( $url_parts_hash->{'host
'} // '' ); 200 my $unambig_url = $driver .':
201 . $unambig_host . ( $unambig_port ?
':'.$unambig_port :
'') .
'/'. ( $url_parts_hash->{
'dbname'}
209 Arg [1] : a hash describing a db connection, or accumulator, as generated by parse_url
210 Example : my $parse = parse_url($url1); my $url2 = hash_to_url($parse);
211 Description : Generates a
"new-style" URL from a hash containing the parse of a
URL 212 : (old or
new style). In cases where a trailing slash is optional, it leaves
213 : off the trailing slash
214 Returntype : a
URL as a
string 221 my $location_part =
'';
222 if ($parse->{
'driver'}) {
223 $location_part = join(
'',
226 $parse->{
'user'} ? $parse->{
'user'}.($parse->{
'pass'} ?
':' . $parse->{
'pass'} :
'').
'@' :
'',
227 $parse->{
'host'} ? $parse->{
'host'}.($parse->{
'port'} ?
':' . $parse->{
'port'} :
'') :
'',
234 my $qp_hash = \%{ $parse->{
'query_params'} };
235 my $object_type =
delete $qp_hash->{
'object_type'}
236 my $query_params_part =
237 (($object_type eq
'Analysis') && !$location_part)
238 ? $qp_hash->{
'logic_name'}
239 : (($object_type eq
'AnalysisJob') && !$location_part)
240 ? $qp_hash->{
'job_id'}
242 ?
'?' . join(
'&', map { $_.
'='.$qp_hash->{$_} } keys %$qp_hash)
245 # DBC extra arguments' part: 246 my $cp_hash = $parse->{
'conn_params'} || {};
247 my $conn_params_part = keys %$cp_hash
248 ?
';' . join(
';', map { $_.
'='.$cp_hash->{$_} } keys %$cp_hash)
251 my $url = $location_part . $query_params_part . $conn_params_part;
257 =head2 hide_url_password
259 Description : Check the command-line
for -url or -pipeline_url in order to
260 replace the password with an environment variable and then
261 exec on the
new arguments (in which
case the
function doesn
't 263 Returntype : void or no return 267 sub hide_url_password { 269 # avoid calling exec whilst in the Perl debugger not to confuse the latter 270 # Detect the presence of the debugger with https://www.nntp.perl.org/group/perl.debugger/2004/11/msg55.html 271 return if defined &DB::DB; 273 # Safeguard to avoid attempting the substitution twice 274 # NOTE: the environment is propagated to the children, meaning that the 275 # variable, once set by beekeeper.pl, will extend to all its workers, 276 # which is fine as long as beekeeper protects the passwords. 277 return if $ENV{EHIVE_SANITIZED_ARGS}; 278 $ENV{EHIVE_SANITIZED_ARGS} = 1; 280 # Work on a copy of @ARGV 284 # Scan the list of arguments 287 # Search for -url and -pipeline_url (with one or two hyphens) 288 if (($a =~ /^--?(pipeline_)?url$/) and @args) { 289 my $url = shift @args; 290 # Does the next value look like a proper URL ? 291 if ($url =~ /^(.*:\/\/\w*:)([^\/\@]*)(\@.*)$/) { 293 my $driver_and_user = $1; 294 my $possible_password = $2; 295 my $url_remainder = $3; 296 # Does the password look like an environment variable ? 297 if ($possible_password =~ /\$(?|\{(\w+)\}|(\w+))/) { 298 # Does the variable exist ? 299 if (defined($ENV{$1})) { 300 # Already a substituted password -> bail out ! 304 # Perform the substitution 305 my $pass_variable = '_EHIVE_HIDDEN_PASS
'; 306 $ENV{$pass_variable} = $possible_password; 307 # Single quotes are needed so that LSF doesn't expand the variable
308 $url = q{
'} . $driver_and_user .'${
'.$pass_variable . '}
' . $url_remainder . q{'};
310 # Found the URL, let's push the remaining arguments and exec 311 push @new_args, $a, $url, @args;
312 exec($^X, $0, @new_args);
314 } elsif ($a eq
'--') {
315 # We've reached the end of the parsable options without finding 316 # a password -> nothing to do 323 # If we arrive here it means we couldn't find anything to substitute, 324 # so there is nothing to do