ensembl-hive-python3  2.7.0
params.py
Go to the documentation of this file.
1 
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 """
18 This module is an implementation of eHive's Param module.
19 It defines ParamContainer which is an attribute of BaseRunnable
20 and not its base class as in eHive's class hierarchy.
21 All the specific warnings and exceptions inherit from ParamWarning
22 and ParamException.
23 """
24 
25 import collections
26 import numbers
27 import unittest
28 
29 
30 class ParamWarning(Warning):
31  """Used by process.BaseRunnable"""
32  pass
33 
34 
35 class ParamException(Exception):
36  """Base class for parameters-related exceptions"""
37  pass
39  """Raised when the parameter name is not a string"""
40  def __str__(self):
41  return '"{0}" (type {1}) is not a valid parameter name'.format(self.args[0], type(self.args[0]).__name__)
43  """Raised when ParamContainer tried to substitute an unexpected structure (only dictionaries and lists are accepted)"""
44  def __str__(self):
45  return 'Cannot substitute elements in objects of type "{0}"'.format(str(type(self.args[0])))
47  """Raised when parameters depend on each other, forming a loop"""
48  def __str__(self):
49  return "Substitution loop has been detected on {0}. Parameter-substitution stack: {1}".format(self.args[0], list(self.args[1].keys()))
51  """Raised when a parameter cannot be required because it is null (None)"""
52  def __str__(self):
53  return "{0} is None".format(self.args[0])
54 
55 
56 class ParamContainer:
57  """Equivalent of eHive's Param module"""
58 
59  def __init__(self, unsubstituted_params, debug=False):
60  """Constructor. "unsubstituted_params" is a dictionary"""
61  self.unsubstituted_param_hash = unsubstituted_params.copy()
62  self.param_hash = {}
63  self.debug = debug
64 
65 
66  # Public methods
67 
68 
69  def set_param(self, param_name, value):
70  """Setter. Returns the new value"""
71  self.validate_parameter_name(param_name)
72  self.param_hash[param_name] = value
73  return value
74 
75  def get_param(self, param_name):
76  """Getter. Performs the parameter substitution"""
77  self.validate_parameter_name(param_name)
78  self.substitution_in_progress = collections.OrderedDict()
79  try:
80  return self.internal_get_param(param_name)
81  except (KeyError, SyntaxError, ParamException) as e:
82  # To hide the part of the stack that is in ParamContainer
83  raise e.with_traceback(None)
84 
85  def has_param(self, param_name):
86  """Returns a boolean. It checks both substituted and unsubstituted parameters"""
87  self.validate_parameter_name(param_name)
88  return (param_name in self.param_hash) or (param_name in self.unsubstituted_param_hash)
89 
90  def substitute_string(self, string):
91  """Apply the parameter substitution to the string"""
92  self.substitution_in_progress = collections.OrderedDict()
93  try:
94  return self.param_substitute(string)
95  except (KeyError, SyntaxError, ParamException) as e:
96  # To hide the part of the stack that is in ParamContainer
97  raise e.with_traceback(None)
98 
99  # Private methods
100 
101  def validate_parameter_name(self, param_name):
102  """Tells whether "param_name" is a non-empty string"""
103  if not isinstance(param_name, str) or (param_name == ''):
104  raise ParamNameException(param_name)
105 
106  def debug_print(self, *args, **kwargs):
107  """Print debug information if the debug flag is turned on (cf constructor)"""
108  if self.debug:
109  print(*args, **kwargs)
110 
111  def internal_get_param(self, param_name):
112  """Equivalent of get_param() that assumes "param_name" is a valid parameter name and hence, doesn't have to raise ParamNameException.
113  It is only used internally"""
114  self.debug_print("internal_get_param", param_name)
115  if param_name not in self.param_hash:
116  x = self.unsubstituted_param_hash[param_name]
117  self.param_hash[param_name] = self.param_substitute(x)
118  return self.param_hash[param_name]
119 
120 
121  def param_substitute(self, structure):
122  """
123  Take any structure and replace the pairs of hashes with the values of the parameters / expression they represent
124  Compatible types: numbers, strings, lists, dictionaries (otherwise, ParamSubstitutionException is raised)
125  """
126  self.debug_print("param_substitute", structure)
127 
128  if structure is None:
129  return None
130 
131  elif isinstance(structure, list):
132  return [self.param_substitute(_) for _ in structure]
133 
134  elif isinstance(structure, dict):
135  # NB: In Python, not everything can be hashed and used as a dictionary key.
136  # Perhaps we should check for such errors ?
137  return {self.param_substitute(key): self.param_substitute(value) for (key,value) in structure.items()}
138 
139  elif isinstance(structure, numbers.Number):
140  return structure
141 
142  elif isinstance(structure, str):
143 
144  # We handle the substitution differently if there is a single reference as we can avoid forcing the result to be a string
145 
146  if structure[:6] == '#expr(' and structure[-6:] == ')expr#' and structure.count('#expr(', 6, -6) == 0 and structure.count(')expr#', 6, -6) == 0:
147  return self.subst_one_hashpair(structure[1:-1], True)
148 
149  if structure[0] == '#' and structure[-1] == '#' and structure.count('#', 1, -1) == 0:
150  if len(structure) <= 2:
151  return structure
152  return self.subst_one_hashpair(structure[1:-1], False)
153 
154  # Fallback to the default parser: all pairs of hashes are substituted
155  return self.subst_all_hashpairs(structure, lambda middle_param: self.subst_one_hashpair(middle_param, False) )
156 
157  else:
158  raise ParamSubstitutionException(structure)
159 
160 
161  def subst_all_hashpairs(self, structure, callback):
162  """
163  Parse "structure" and replace all the pairs of hashes by the result of calling callback() on the pair content
164  #expr()expr# are treated differently by calling subst_one_hashpair()
165  The result is a string (like structure)
166  """
167  self.debug_print("subst_all_hashpairs", structure)
168 
169  # Allow a single literal hash
170  if structure.count("#") == 1:
171  return structure
172 
173  result = []
174  while True:
175  (head,_,tmp) = structure.partition('#')
176  result.append(head)
177  if _ != '#':
178  return ''.join(result)
179  if tmp.startswith('expr('):
180  i = tmp.find(')expr#')
181  if i == -1:
182  raise SyntaxError("Unmatched '#expr(' token")
183  val = self.subst_one_hashpair(tmp[:i+5], True)
184  tail = tmp[i+6:]
185  else:
186  (middle_param,_,tail) = tmp.partition('#')
187  if _ != '#':
188  raise SyntaxError("Unmatched '#' token")
189  if middle_param == '':
190  val = '##'
191  else:
192  val = callback(middle_param)
193  result.append(str(val))
194  structure = tail
195 
196 
197  def subst_one_hashpair(self, inside_hashes, is_expr):
198  """
199  Run the parameter substitution for a single pair of hashes.
200  Here, we only need to handle #expr()expr#, #func:params# and #param_name#
201  as each condition has been parsed in the other methods
202  """
203  self.debug_print("subst_one_hashpair", inside_hashes, is_expr)
204 
205  # Keep track of the substitutions we've made to detect loops
206  if inside_hashes in self.substitution_in_progress:
207  raise ParamInfiniteLoopException(inside_hashes, self.substitution_in_progress)
208  self.substitution_in_progress[inside_hashes] = 1
209 
210  # We ask the caller to provide the is_expr tag to avoid checking the string again for the presence of the "expr" tokens
211  if is_expr:
212  s = self.subst_all_hashpairs(inside_hashes[5:-5].strip(), 'self.internal_get_param("{0}")'.format)
213  val = eval(s)
214 
215  elif ':' in inside_hashes:
216  (func_name,_,parameters) = inside_hashes.partition(':')
217  try:
218  f = eval(func_name)
219  except:
220  raise SyntaxError("Unknown method: " + func_name)
221  if callable(f):
222  if parameters:
223  val = f(self.internal_get_param(parameters))
224  else:
225  val = f()
226  else:
227  raise SyntaxError(func_name + " is not callable")
228 
229  else:
230  val = self.internal_get_param(inside_hashes)
231 
232  del self.substitution_in_progress[inside_hashes]
233  return val
234 
235 
236 class ParamContainerTestExceptions(unittest.TestCase):
237 
239  with self.assertRaises(ParamInfiniteLoopException):
240  ParamContainer({'a': '#b#', 'b': '#a#'}).get_param('a')
241 
242  def test_missing_param(self):
243  with self.assertRaises(KeyError):
244  ParamContainer({'a': 3}).get_param('b')
245 
246  def test_param_must_be_string(self):
247  with self.assertRaises(ParamNameException):
248  ParamContainer({'a': 3}).get_param(0)
249 
250 
251 class ParamContainerTestSubstitutions(unittest.TestCase):
252 
253  # Type to clarify seed_params
254  TestParamEntry = collections.namedtuple('TestParamEntry', ['name', 'seed_value', 'eval_value'])
255 
256  # Test data
257  seed_params_list = (
258  TestParamEntry('alpha', 2, 2),
259  TestParamEntry('beta', 5, 5),
260  TestParamEntry('delta', '#expr( #alpha#*#beta# )expr#', 10),
261  TestParamEntry('epsilon', 'alpha#beta', 'alpha#beta'), # Single hash -> no substitution
262 
263  TestParamEntry('gamma', [10, 20, 33, 15], [10, 20, 33, 15]),
264  TestParamEntry('gamma_prime', '#expr( #gamma# )expr#', [10, 20, 33, 15]),
265  TestParamEntry('gamma_second', '#expr( list(#gamma#) )expr#', [10, 20, 33, 15]),
266 
267  TestParamEntry('age', {'Alice': 17, 'Bob': 20, 'Chloe': 21}, {'Alice': 17, 'Bob': 20, 'Chloe': 21}),
268  TestParamEntry('age_prime', '#expr( #age# )expr#', {'Alice': 17, 'Bob': 20, 'Chloe': 21}),
269  TestParamEntry('age_second', '#expr( dict(#age#) )expr#', {'Alice': 17, 'Bob': 20, 'Chloe': 21}),
270 
271  TestParamEntry('csv', '[123,456,789]', '[123,456,789]'),
272  TestParamEntry('csv_prime', '#expr( #csv# )expr#', '[123,456,789]'),
273  TestParamEntry('listref', '#expr( eval(#csv#) )expr#', [123, 456, 789]),
274 
275  TestParamEntry('null', None, None),
276  TestParamEntry('ref_null', '#null#', None),
277  TestParamEntry('ref2_null', '#expr( #null# )expr#', None),
278  TestParamEntry('ref3_null', '#alpha##null##beta#', '2None5'),
279  )
280  seed_params_dict = {p.name: p.seed_value for p in seed_params_list}
281 
282  def setUp(self):
284 
285  def assertSubstitution(self, param_string, expected_value, msg):
286  """Helper method to execute the substitution and check the result"""
287  value = self.params.substitute_string(param_string)
288  self.assertEqual(value, expected_value, msg)
289 
290  def test_values(self):
291  for p in self.seed_params_list:
292  self.assertEqual(self.params.get_param(p.name), p.eval_value, p.name + " can be retrieved")
293 
294  def test_numbers(self):
295  self.assertSubstitution(
296  '#alpha# and another: #beta# and again one: #alpha# and the other: #beta# . Their product: #delta#',
297  '2 and another: 5 and again one: 2 and the other: 5 . Their product: 10',
298  'Scalar substitutions'
299  )
300 
301  def test_lists(self):
302  self.assertSubstitution(
303  '#gamma#',
304  [10, 20, 33, 15],
305  'gamma not stringified'
306  )
307  self.assertSubstitution(
308  '#expr( #gamma# )expr#',
309  [10, 20, 33, 15],
310  'expr-gamma not stringified'
311  )
312  self.assertSubstitution(
313  '#expr( "~".join([str(_) for _ in sorted(#gamma#)]) )expr#',
314  '10~15~20~33',
315  'gamma stringification'
316  )
317  self.assertSubstitution(
318  '#expr( "~".join([str(_) for _ in sorted(#gamma_prime#)]) )expr#',
319  '10~15~20~33',
320  'gamma_prime stringification'
321  )
322 
323  def test_dictionaries(self):
324  self.assertSubstitution(
325  '#age#',
326  {'Alice': 17, 'Bob': 20, 'Chloe': 21},
327  'age not stringified'
328  )
329  self.assertSubstitution(
330  '#expr( #age# )expr#',
331  {'Alice': 17, 'Bob': 20, 'Chloe': 21},
332  'age not stringified'
333  )
334  self.assertSubstitution(
335  '#expr( " and ".join(["{0} is {1} years old".format(p,a) for (p,a) in sorted(#age#.items())]) )expr#',
336  'Alice is 17 years old and Bob is 20 years old and Chloe is 21 years old',
337  'complex fold of age'
338  )
339  self.assertSubstitution(
340  '#expr( " and ".join(["{0} is {1} years old".format(p,a) for (p,a) in sorted(#age_prime#.items())]) )expr#',
341  'Alice is 17 years old and Bob is 20 years old and Chloe is 21 years old',
342  'complex fold of age_prime'
343  )
344 
345  def test_maths_methods(self):
346  self.assertSubstitution(
347  '#expr( sum(#gamma#) )expr#',
348  78,
349  'sum(gamma)'
350  )
351  self.assertSubstitution(
352  '#expr( min(#gamma#) )expr#',
353  10,
354  'min(gamma)'
355  )
356  self.assertSubstitution(
357  '#expr( max(#gamma#) )expr#',
358  33,
359  'max(gamma)'
360  )
361 
362  def test_indexes(self):
363  self.assertSubstitution(
364  '#expr( #age#["Alice"]+max(#gamma#)+#listref#[0] )expr#',
365  173,
366  'adding indexed and keyed values'
367  )
368 
369  def test_param_modification(self):
370  # Force the substitution of these parameters
371  self.params.get_param('gamma')
372  self.params.get_param('gamma_prime')
373  self.params.get_param('gamma_second')
374  # Modify gamma
375  self.params.get_param('gamma').append("val0")
376  # Only gamma and gamma_prime should be modified
377  # because they are the same reference.
378  # gamma_second is a copy made before the edition
379  # so should still have the initial value.
380  self.assertEqual(
381  self.params.get_param('gamma'),
382  [10, 20, 33, 15, 'val0'],
383  'gamma'
384  )
385  self.assertEqual(
386  self.params.get_param('gamma_prime'),
387  [10, 20, 33, 15, 'val0'],
388  'gamma_prime'
389  )
390  self.assertEqual(
391  self.params.get_param('gamma_second'),
392  [10, 20, 33, 15],
393  'gamma_second'
394  )
395 
eHive.params.ParamContainerTestSubstitutions.test_values
def test_values(self)
Definition: params.py:292
eHive.params.ParamContainerTestSubstitutions.test_dictionaries
def test_dictionaries(self)
Definition: params.py:325
eHive.params.ParamContainerTestSubstitutions.test_numbers
def test_numbers(self)
Definition: params.py:296
eHive.params.ParamContainer.has_param
def has_param(self, param_name)
Returns a boolean.
Definition: params.py:86
eHive.params.ParamContainerTestSubstitutions.params
params
Definition: params.py:285
eHive.params.NullParamException.__str__
def __str__(self)
Definition: params.py:52
eHive.params.ParamContainerTestExceptions.test_param_must_be_string
def test_param_must_be_string(self)
Definition: params.py:248
eHive.params.ParamContainer.set_param
def set_param(self, param_name, value)
Setter.
Definition: params.py:70
eHive.params.ParamContainerTestSubstitutions.seed_params_dict
dictionary seed_params_dict
Definition: params.py:282
eHive.params.ParamContainer.subst_one_hashpair
def subst_one_hashpair(self, inside_hashes, is_expr)
Run the parameter substitution for a single pair of hashes.
Definition: params.py:204
eHive.params.ParamContainer.param_substitute
def param_substitute(self, structure)
Take any structure and replace the pairs of hashes with the values of the parameters / expression the...
Definition: params.py:125
eHive.params.ParamContainer
Equivalent of eHive's Param module.
Definition: params.py:57
eHive.params.ParamContainerTestSubstitutions
Definition: params.py:253
eHive.params.ParamContainer.validate_parameter_name
def validate_parameter_name(self, param_name)
Tells whether "param_name" is a non-empty string.
Definition: params.py:102
eHive.params.ParamContainer.unsubstituted_param_hash
unsubstituted_param_hash
Definition: params.py:61
eHive.params.ParamContainerTestSubstitutions.test_param_modification
def test_param_modification(self)
Definition: params.py:371
eHive.params.ParamContainer.internal_get_param
def internal_get_param(self, param_name)
Equivalent of get_param() that assumes "param_name" is a valid parameter name and hence,...
Definition: params.py:113
eHive.params.ParamContainerTestSubstitutions.seed_params_list
tuple seed_params_list
Definition: params.py:259
eHive.params.ParamSubstitutionException
Raised when ParamContainer tried to substitute an unexpected structure (only dictionaries and lists a...
Definition: params.py:43
eHive.params.ParamContainerTestSubstitutions.test_lists
def test_lists(self)
Definition: params.py:303
eHive.params.ParamContainerTestSubstitutions.TestParamEntry
TestParamEntry
Definition: params.py:256
eHive.params.ParamInfiniteLoopException.__str__
def __str__(self)
Definition: params.py:48
eHive.params.ParamContainer.get_param
def get_param(self, param_name)
Getter.
Definition: params.py:76
eHive.params.ParamNameException.__str__
def __str__(self)
Definition: params.py:40
eHive.params.ParamContainer.substitute_string
def substitute_string(self, string)
Apply the parameter substitution to the string.
Definition: params.py:91
eHive.params.ParamContainerTestSubstitutions.test_indexes
def test_indexes(self)
Definition: params.py:364
eHive.params.ParamContainerTestExceptions
Definition: params.py:238
eHive.params.ParamContainer.debug
debug
Definition: params.py:63
eHive.params.ParamContainer.subst_all_hashpairs
def subst_all_hashpairs(self, structure, callback)
Parse "structure" and replace all the pairs of hashes by the result of calling callback() on the pair...
Definition: params.py:168
eHive.params.ParamContainer.substitution_in_progress
substitution_in_progress
Definition: params.py:78
eHive.params.ParamInfiniteLoopException
Raised when parameters depend on each other, forming a loop.
Definition: params.py:47
eHive.params.ParamContainer.__init__
def __init__(self, unsubstituted_params, debug=False)
Constructor.
Definition: params.py:60
eHive.params.ParamWarning
Used by process.BaseRunnable.
Definition: params.py:31
eHive.params.ParamContainerTestSubstitutions.setUp
def setUp(self)
Definition: params.py:284
eHive.params.ParamSubstitutionException.__str__
def __str__(self)
Definition: params.py:44
eHive.params.NullParamException
Raised when a parameter cannot be required because it is null (None)
Definition: params.py:51
eHive.params.ParamContainer.param_hash
param_hash
Definition: params.py:62
eHive.params.ParamContainer.debug_print
def debug_print(self, *args, **kwargs)
Print debug information if the debug flag is turned on (cf constructor)
Definition: params.py:107
eHive.params.ParamContainerTestExceptions.test_missing_param
def test_missing_param(self)
Definition: params.py:244
eHive.params.ParamContainerTestSubstitutions.assertSubstitution
def assertSubstitution(self, param_string, expected_value, msg)
Helper method to execute the substitution and check the result.
Definition: params.py:288
eHive.params.ParamNameException
Raised when the parameter name is not a string.
Definition: params.py:39
eHive.params.ParamException
Base class for parameters-related exceptions.
Definition: params.py:36
eHive.params.ParamContainerTestSubstitutions.test_maths_methods
def test_maths_methods(self)
Definition: params.py:347
eHive.params.ParamContainerTestExceptions.test_infinite_loops
def test_infinite_loops(self)
Definition: params.py:240