ensembl-hive  2.7.0
URI.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 
35 =head1 SYNOPSIS
36 
37  use Bio::EnsEMBL::Utils::URI qw/parse_uri is_uri/;
38  # or use Bio::EnsEMBL::Utils::URI qw/:all/; # to bring everything in
39 
40  my $db_uri = parse_uri('mysql://user@host:3157/db');
41  my $http_uri = parse_uri('http://www.google.co.uk:80/search?q=t');
42 
43  is_uri('mysql://user@host'); # returns 1
44  is_uri('file:///my/path'); # returns 1
45  is_uri('/my/path'); # returns 0
46 
47 =head1 DESCRIPTION
48 
49 This object is a generic URI parser which is primarily used in the
50 parsing of database URIs into a more managable data structure. We also provide
51 the resulting URI object
52 
53 =head1 DEPENDENCIES
54 
55 L<URI::Escape> is an optional dependency but if available the code will attempt
56 to perform URI encoding/decoding on parameters. If you do not want this
57 functionality then modify the global C<$Bio::EnsEMBL::Utils::URI::URI_ESCAPE>
58 to false;
59 
60 =head1 METHODS
61 
62 =cut
63 
64 package Bio::EnsEMBL::Utils::URI;
65 
66 use strict;
67 use warnings;
68 
69 use Scalar::Util qw/looks_like_number/;
70 use Bio::EnsEMBL::Utils::Exception qw(throw);
71 use File::Spec;
72 
73 our $URI_ESCAPE;
74 $URI_ESCAPE = 0;
75 eval {
76  require URI::Escape;
77  URI::Escape->import();
78  $URI_ESCAPE = 1;
79 };
80 
81 use base qw/Exporter/;
82 our @EXPORT_OK;
83 our %EXPORT_TAGS;
84 @EXPORT_OK = qw/parse_uri is_uri/;
85 %EXPORT_TAGS = ( all => [@EXPORT_OK] );
86 
87 ####URI Parsing
88 
89 =head2 is_uri
90 
91  Arg[1] : Scalar; URI to parse
92  Example : is_uri('mysql://user:pass@host:415/db');
93  Description : Looks for the existence of a URI scheme to decide if this
94  is a classical URI. Whilst non-scheme based URIs can still be
95  interprited it is useful to use when you need to know if
96  you are going to work with a URI or not
97  Returntype : Boolean
98  Caller : General
99  Status : Beta
100 
101 =cut
102 
103 sub is_uri {
104  my ($uri) = @_;
105  return 0 if ! $uri;
106  my $SCHEME = qr{ ([^:]*) :// }xms;
107  return ($uri =~ $SCHEME) ? 1 : 0;
108 }
109 
110 =head2 parse_uri
111 
112  Arg[1] : Scalar; URI to parse
113  Example : my $uri = parse_uri('mysql://user:pass@host:415/db');
114  Description : A URL parser which attempts to convert many different types
115  of URL into a common data structure.
116  Returntype : Bio::EnsEMBL::Utils::URI
117  Caller : General
118  Status : Beta
119 
120 =cut
121 
122 sub parse_uri {
123  my ($url) = @_;
124 
125  my $SCHEME = qr{ ([^:]*) :// }xms;
126  my $USER = qr{ ([^/:\@]+)? :? ([^/\@]+)? \@ }xms;
127  my $HOST = qr{ ([^/:]+)? :? ([^/]+)? }xms;
128  my $DB = qr{ / ([^/?]+)? /? ([^/?]+)? }xms;
129  my $PARAMS = qr{ \? (.+)}xms;
130 
131  my $p;
132 
133  if($url =~ qr{ $SCHEME ([^?]+) (?:$PARAMS)? }xms) {
134  my $scheme = $1;
135  $scheme = ($URI_ESCAPE) ? uri_unescape($scheme) : $scheme;
136  $p = Bio::EnsEMBL::Utils::URI->new($scheme);
137  my ($locator, $params) = ($2, $3);
138 
139  if($scheme eq 'file') {
140  $p->path($locator);
141  }
142  elsif($scheme eq 'sqlite') {
143  $p->path($locator);
144  }
145  else {
146  if($locator =~ s/^$USER//) {
147  $p->user($1);
148  $p->pass($2);
149  }
150  if($locator =~ s/^$HOST//) {
151  $p->host(($URI_ESCAPE) ? uri_unescape($1) : $1);
152  $p->port(($URI_ESCAPE) ? uri_unescape($2) : $2);
153  }
154 
155  if($p->is_db_scheme() || $scheme eq q{}) {
156  if($locator =~ $DB) {
157  $p->db_params()->{dbname} = ($URI_ESCAPE) ? uri_unescape($1) : $1;
158  $p->db_params()->{table} = ($URI_ESCAPE) ? uri_unescape($2) : $2;
159  }
160  }
161  else {
162  $p->path($locator);
163  }
164  }
165 
166  if(defined $params) {
167  my @kv_pairs = split(/;|&/, $params);
168  foreach my $kv_string (@kv_pairs) {
169  my ($key, $value) = map { ($URI_ESCAPE) ? uri_unescape($_) : $_ } split(/=/, $kv_string);
170  $p->add_param($key, $value);
171  }
172  }
173  }
174 
175  return $p;
176 }
177 
178 ####URI Object
179 
180 =pod
181 
182 =head2 new()
183 
184  Arg[1] : String; scheme the URI will confrom to
185  Description : New object call
186  Returntype : Bio::EnsEMBL::Utils::URIParser::URI
187  Exceptions : Thrown if scheme is undefined.
188  Status : Stable
189 
190 =cut
191 
192 sub new {
193  my ($class, $scheme) = @_;
194  $class = ref($class) || $class;
195  throw "Scheme cannot be undefined. Empty string is allowed" if ! defined $scheme;
196 
197  my $self = bless ({
198  params => {},
199  param_keys => [],
200  db_params => {},
201  scheme => $scheme,
202  }, $class);
203 
204  return $self;
205 }
206 
207 =head2 db_schemes()
208 
209  Description: Returns a hash of scheme names known to be databases
210  Returntype : HashRef
211  Exceptions : None
212  Status : Stable
213 
214 =cut
215 
216 sub db_schemes {
217  my ($self) = @_;
218  return {map { $_ => 1 } qw/mysql ODBC sqlite Oracle Sybase/};
219 }
220 
221 
222 =head2 is_db_scheme()
223 
224  Description: Returns true if the code believes the scheme to be a Database
225  Returntype : Boolean
226  Exceptions : None
227  Status : Stable
228 
229 =cut
230 
231 sub is_db_scheme {
232  my ($self) = @_;
233  return ( exists $self->db_schemes()->{$self->scheme()} ) ? 1 : 0;
234 }
235 
236 =head2 scheme()
237 
238  Description : Getter for the scheme attribute
239  Returntype : String
240  Exceptions : None
241  Status : Stable
242 
243 =cut
244 
245 sub scheme {
246  my ($self) = @_;
247  return $self->{scheme};
248 }
249 
250 =head2 path()
251 
252  Arg[1] : Setter argument
253  Description : Getter/setter for the path attribute
254  Returntype : String
255  Exceptions : None
256  Status : Stable
257 
258 =cut
259 
260 sub path {
261  my ($self, $path) = @_;
262  $self->{path} = $path if defined $path;
263  return $self->{path};
264 }
265 
266 =head2 user()
267 
268  Arg[1] : Setter argument
269  Description : Getter/setter for the user attribute
270  Returntype : String
271  Exceptions : None
272  Status : Stable
273 
274 =cut
275 
276 sub user {
277  my ($self, $user) = @_;
278  $self->{user} = $user if defined $user;
279  return $self->{user};
280 }
281 
282 =head2 pass()
283 
284  Arg[1] : Setter argument
285  Description : Getter/setter for the password attribute
286  Returntype : String
287  Exceptions : None
288  Status : Stable
289 
290 =cut
291 
292 sub pass {
293  my ($self, $pass) = @_;
294  $self->{pass} = $pass if defined $pass;
295  return $self->{pass};
296 }
297 
298 =head2 host()
299 
300  Arg[1] : Setter argument
301  Description : Getter/setter for the host attribute
302  Returntype : String
303  Exceptions : None
304  Status : Stable
305 
306 =cut
307 
308 sub host {
309  my ($self, $host) = @_;
310  $self->{host} = $host if defined $host;
311  return $self->{host};
312 }
313 
314 =head2 port()
315 
316  Arg[1] : Setter argument
317  Description : Getter/setter for the port attribute
318  Returntype : Integer
319  Exceptions : If port is not a number, less than 1 or not a whole integer
320  Status : Stable
321 
322 =cut
323 
324 sub port {
325  my ($self, $port) = @_;
326  if(defined $port) {
327  if(! looks_like_number($port) || $port < 1 || int($port) != $port) {
328  throw "Port $port is not a number, less than 1 or not a whole integer";
329  }
330  $self->{port} = $port if defined $port;
331  }
332  return $self->{port};
333 }
334 
335 =head2 param_keys()
336 
337  Description : Getter for the paramater map keys in the order they were first
338  seen. Keys should only appear once in this array
339  Returntype : ArrayRef
340  Exceptions : None
341  Status : Stable
342 
343 =cut
344 
345 sub param_keys {
346  my ($self) = @_;
347  return [@{$self->{param_keys}}];
348 }
349 
350 =head2 param_exists_ci()
351 
352  Arg[1] : String; Key
353  Description : Performs a case-insensitive search for the given key
354  Returntype : Boolean; returns true if your given key was seen
355  Exceptions : None
356  Status : Stable
357 
358 =cut
359 
360 sub param_exists_ci {
361  my ($self, $key) = @_;
362  my %keys = map { uc($_) => 1 } @{$self->param_keys()};
363  return ($keys{uc($key)}) ? 1 : 0;
364 }
365 
366 =head2 add_param()
367 
368  Arg[1] : String; key
369  Arg[1] : Scalar; value
370  Description : Add a key/value to the params map. Multiple inserts of the same
371  key is allowed
372  Returntype : None
373  Exceptions : None
374  Status : Stable
375 
376 =cut
377 
378 sub add_param {
379  my ($self, $key, $value) = @_;
380  if(!exists $self->{params}->{$key}) {
381  $self->{params}->{$key} = [];
382  push(@{$self->{param_keys}}, $key);
383  }
384  push(@{$self->{params}->{$key}}, $value);
385  return;
386 }
387 
388 =head2 get_params()
389 
390  Arg[1] : String; key
391  Description : Returns the values which were found to be linked to the given
392  key. Arrays are returned because one key can have many
393  values in a URI
394  Returntype : ArrayRef[Scalar]
395  Exceptions : None
396  Status : Stable
397 
398 =cut
399 
400 sub get_params {
401  my ($self, $key) = @_;
402  return [] if ! exists $self->{params}->{$key};
403  return [@{$self->{params}->{$key}}];
404 }
405 
406 =head2 db_params()
407 
408  Description : Storage of parameters used only for database URIs since
409  they require
410  Returntype : HashRef; Database name is keyed under C<dbname> and the
411  table is keyed under C<table>
412  Exceptions : None
413  Status : Stable
414 
415 =cut
416 
417 sub db_params {
418  my ($self) = @_;
419  return $self->{db_params};
420 }
421 
422 =head2 generate_dbsql_params()
423 
424  Arg[1] : boolean $no_table alows you to avoid pushing -TABLE as an
425  output value
426  Description : Generates a Hash of Ensembl compatible parameters to be used
427  to construct a DB object. We combine those parameters
428  which are deemed to be part of the C<db_params()> method
429  under C<-DBNAME> and C<-TABLE>. We also search for a number
430  of optional parameters which are lowercased equivalents
431  of the construction parameters available from a
432  L<Bio::EnsEMBL::DBSQL::DBAdaptor>,
433  L<Bio::EnsEMBL::DBSQL::DBConnection> as well as C<verbose>
434  being supported.
435 
436  We also convert the scheme type into the driver attribute
437 
438  Returntype : Hash (not a reference). Output can be put into a C<DBConnection>
439  constructor.
440  Exceptions : None
441  Status : Stable
442 
443 =cut
444 
445 sub generate_dbsql_params {
446  my ($self, $no_table) = @_;
447  my %db_params;
448 
449  $db_params{-DRIVER} = $self->scheme();
450  $db_params{-HOST} = $self->host() if $self->host();
451  $db_params{-PORT} = $self->port() if $self->port();
452  $db_params{-USER} = $self->user() if $self->user();
453  $db_params{-PASS} = $self->pass() if $self->pass();
454 
455  my $dbname;
456  my $table;
457  if($self->scheme() eq 'sqlite') {
458  ($dbname, $table) = $self->_decode_sqlite();
459  }
460  else {
461  $dbname = $self->db_params()->{dbname};
462  $table = $self->db_params()->{table};
463  }
464 
465  $db_params{-DBNAME} = $dbname if $dbname;
466  $db_params{-TABLE} = $table if ! $no_table && $table;
467 
468  foreach my $boolean_param (qw/disconnect_when_inactive reconnect_when_connection_lost is_multispecies no_cache verbose/) {
469  if($self->param_exists_ci($boolean_param)) {
470  $db_params{q{-}.uc($boolean_param)} = 1;
471  }
472  }
473  foreach my $value_param (qw/species group species_id wait_timeout/) {
474  if($self->param_exists_ci($value_param)) {
475  $db_params{q{-}.uc($value_param)} = $self->get_params($value_param)->[0];
476  }
477  }
478 
479  return %db_params;
480 }
481 
482 =head2 _decode_sqlite
483 
484  Description : Performs path gymnastics to decode into a number of possible
485  options. The issue with SQLite is that the normal URI scheme
486  looks like sqlite:
487  that the DB name is C</my/path.sqlite> and the table is
488  C<table>?
489 
490  The code takes a path, looks for the full path & if it cannot
491  be found looks for the file a directory back. In the above
492  example it would have looked for C</my/path.sqlite/table>,
493  found it to be non-existant, looked for C</my/path.sqlite>
494  and found it.
495 
496  If the path splitting procdure resulted in just 1 file after
497  the first existence check e.g. C<sqlite://db.sqlite> it assumes
498  that should be the name. If no file can be found we default to
499  the full length path.
500  Caller : internal
501 
502 =cut
503 
504 sub _decode_sqlite {
505  my ($self) = @_;
506  my $dbname;
507  my $table;
508  my $path = $self->path();
509  if(-f $path) {
510  $dbname = $path;
511  }
512  else {
513  my ($volume, $directories, $file) = File::Spec->splitpath($path);
514  my @splitdirs = File::Spec->splitdir($directories);
515  if(@splitdirs == 1) {
516  $dbname = $path;
517  }
518  else {
519  my $new_file = pop(@splitdirs);
520  $new_file ||= q{};
521  my $new_path = File::Spec->catpath($volume, File::Spec->catdir(@splitdirs), $new_file);
522  if($new_path ne File::Spec->rootdir() && -f $new_path) {
523  $dbname = $new_path;
524  $table = $file;
525  }
526  else {
527  $dbname = $path;
528  }
529  }
530  }
531 
532  $self->db_params()->{dbname} = $dbname if $dbname;
533  $self->db_params()->{table} = $table if $table;
534 
535  return ($dbname, $table);
536 }
537 
538 =head2 generate_uri()
539 
540  Description : Generates a URI string from the paramaters in this object
541  Returntype : String
542  Exceptions : None
543  Status : Stable
544 
545 =cut
546 
547 sub generate_uri {
548  my ($self) = @_;
549  my $scheme = sprintf('%s://', ($URI_ESCAPE) ? uri_escape($self->scheme()) : $self->scheme());
550  my $user_credentials = q{};
551  my $host_credentials = q{};
552  my $location = q{};
553 
554  if($self->user() || $self->pass()) {
555  my $user = $self->user();
556  my $pass = $self->pass();
557  if($URI_ESCAPE) {
558  $user = uri_escape($user) if $user;
559  $pass = uri_escape($pass) if $pass;
560  }
561  $user_credentials = sprintf('%s%s@',
562  ( $user ? $user : q{} ),
563  ( $pass ? q{:}.$pass : q{} )
564  );
565  }
566 
567  if($self->host() || $self->port()) {
568  my $host = $self->host();
569  my $port = $self->port();
570  if($URI_ESCAPE) {
571  $host = uri_escape($host) if $host;
572  $port = uri_escape($port) if $port;
573  }
574  $host_credentials = sprintf('%s%s',
575  ( $host ? $host : q{} ),
576  ( $port ? q{:}.$port : q{} )
577  );
578  }
579 
580  if($self->is_db_scheme() || $self->scheme() eq '') {
581  if($self->scheme() eq 'sqlite') {
582  if(! $self->path()) {
583  my $tmp_loc = $self->db_params()->{dbname};
584  throw "There is no dbname available" unless $tmp_loc;
585  $tmp_loc .= q{/}.$self->db_params()->{table} if $self->db_params()->{table};
586  $self->path($tmp_loc);
587  }
588  $location = $self->path();
589  }
590  else {
591  my $dbname = $self->db_params()->{dbname};
592  my $table = $self->db_params()->{table};
593  if($dbname || $table) {
594  if($URI_ESCAPE) {
595  $dbname = uri_escape($dbname) if $dbname;
596  $table = uri_escape($table) if $table;
597  }
598  $location = sprintf('/%s%s',
599  ($dbname ? $dbname : q{}),
600  ($table ? q{/}.$table : q{})
601  );
602  }
603  }
604  }
605  else {
606  $location = $self->path() if $self->path();
607  }
608 
609  my $param_string = q{};
610  if(@{$self->param_keys()}) {
611  $param_string = q{?};
612  my @params;
613  foreach my $key (@{$self->param_keys}) {
614  my $values_array = $self->get_params($key);
615  foreach my $value (@{$values_array}) {
616  my $encoded_key = ($URI_ESCAPE) ? uri_escape($key) : $key;
617  my $encoded_value = ($URI_ESCAPE) ? uri_escape($value) : $value;
618  push(@params, ($encoded_value) ? "$encoded_key=$encoded_value" : $encoded_key);
619  }
620  }
621  $param_string .= join(q{;}, @params);
622  }
623 
624  return join(q{}, $scheme, $user_credentials, $host_credentials, $location, $param_string);
625 }
626 
627 1;
EnsEMBL
Definition: Filter.pm:1
map
public map()
Bio::EnsEMBL::Utils::URI
Definition: URI.pm:28
Bio::EnsEMBL::Utils::URI::new
public Bio::EnsEMBL::Utils::URIParser::URI new()
Bio::EnsEMBL::Utils::Scalar
Definition: Scalar.pm:66
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68