Simplified box boundaries do not broadcast in Agent
DJCordhose opened this issue · comments
Describe the bug
When using arrays with identical values for box space in environments, some internal optimizations reduce them to a single number and later rely on broadcasting which fails in the actor and value networks.
E.g. when your lower bounds of a box are all zeros (not that uncommon): [0. 0. 0. 0. ...] they will be reduced to a single 0 in the converted environment.
Once you try training with such an environment you will get a stack trace like the one below
Stack Trace
ValueError Traceback (most recent call last)
<timed exec> in <module>
~\Development\easyagents\easyagents\agents.py in train(self, callbacks, num_iterations, num_episodes_per_iteration, max_steps_per_episode, num_epochs_per_iteration, num_iterations_between_eval, num_episodes_per_eval, learning_rate, train_context, default_plots)
352 train_context.learning_rate = learning_rate
353
--> 354 super().train(train_context=train_context, callbacks=callbacks, default_plots=default_plots)
355 return train_context
356
~\Development\easyagents\easyagents\agents.py in train(self, train_context, callbacks, default_plots)
140 callbacks = [callbacks]
141 callbacks = self._prepare_callbacks(callbacks, default_plots, [plot.Loss(), plot.Steps(), plot.Rewards()])
--> 142 self._backend_agent.train(train_context=train_context, callbacks=callbacks)
143
144
~\Development\easyagents\easyagents\backends\core.py in train(self, train_context, callbacks)
360 monitor._MonitorEnv._register_backend_agent(self)
361 self._on_train_begin()
--> 362 self.train_implementation(self._agent_context.train)
363 self._on_train_end()
364 finally:
~\Development\easyagents\easyagents\backends\tfagents.py in train_implementation(self, train_context)
237 tf_agent = ppo_agent.PPOAgent(timestep_spec, action_spec, optimizer,
238 actor_net=actor_net, value_net=value_net,
--> 239 num_epochs=tc.num_epochs_per_iteration)
240 self.log_api('tf_agent.initialize()')
241 tf_agent.initialize()
~\AppData\Roaming\Python\Python37\site-packages\gin\config.py in wrapper(*args, **kwargs)
1030 scope_info = " in scope '{}'".format(scope_str) if scope_str else ''
1031 err_str = err_str.format(name, fn, scope_info)
-> 1032 utils.augment_exception_message_and_reraise(e, err_str)
1033
1034 return wrapper
~\AppData\Roaming\Python\Python37\site-packages\gin\utils.py in augment_exception_message_and_reraise(exception, message)
47 if six.PY3:
48 ExceptionProxy.__qualname__ = type(exception).__qualname__
---> 49 six.raise_from(proxy.with_traceback(exception.__traceback__), None)
50 else:
51 six.reraise(proxy, None, sys.exc_info()[2])
c:\users\olive\anaconda3\envs\ea\lib\site-packages\six.py in raise_from(value, from_value)
~\AppData\Roaming\Python\Python37\site-packages\gin\config.py in wrapper(*args, **kwargs)
1007
1008 try:
-> 1009 return fn(*new_args, **new_kwargs)
1010 except Exception as e: # pylint: disable=broad-except
1011 err_str = ''
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tf_agents\agents\ppo\ppo_agent.py in __init__(self, time_step_spec, action_spec, optimizer, actor_net, value_net, importance_ratio_clipping, lambda_value, discount_factor, entropy_regularization, policy_l2_reg, value_function_l2_reg, value_pred_loss_coef, num_epochs, use_gae, use_td_lambda_return, normalize_rewards, reward_norm_clipping, normalize_observations, log_prob_clipping, kl_cutoff_factor, kl_cutoff_coef, initial_adaptive_kl_beta, adaptive_kl_target, adaptive_kl_tolerance, gradient_clipping, check_numerics, debug_summaries, summarize_grads_and_vars, train_step_counter, name)
243 observation_normalizer=self._observation_normalizer,
244 clip=False,
--> 245 collect=False))
246
247 collect_policy = ppo_policy.PPOPolicy(
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tf_agents\agents\ppo\ppo_policy.py in __init__(self, time_step_spec, action_spec, actor_network, value_network, observation_normalizer, clip, collect)
85 actor_network=actor_network,
86 observation_normalizer=observation_normalizer,
---> 87 clip=clip)
88
89 self._collect = collect
~\AppData\Roaming\Python\Python37\site-packages\gin\config.py in wrapper(*args, **kwargs)
1030 scope_info = " in scope '{}'".format(scope_str) if scope_str else ''
1031 err_str = err_str.format(name, fn, scope_info)
-> 1032 utils.augment_exception_message_and_reraise(e, err_str)
1033
1034 return wrapper
~\AppData\Roaming\Python\Python37\site-packages\gin\utils.py in augment_exception_message_and_reraise(exception, message)
47 if six.PY3:
48 ExceptionProxy.__qualname__ = type(exception).__qualname__
---> 49 six.raise_from(proxy.with_traceback(exception.__traceback__), None)
50 else:
51 six.reraise(proxy, None, sys.exc_info()[2])
c:\users\olive\anaconda3\envs\ea\lib\site-packages\six.py in raise_from(value, from_value)
~\AppData\Roaming\Python\Python37\site-packages\gin\config.py in wrapper(*args, **kwargs)
1007
1008 try:
-> 1009 return fn(*new_args, **new_kwargs)
1010 except Exception as e: # pylint: disable=broad-except
1011 err_str = ''
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tf_agents\policies\actor_policy.py in __init__(self, time_step_spec, action_spec, actor_network, info_spec, observation_normalizer, clip, name)
67 raise ValueError('actor_network must be a network.Network. Found '
68 '{}.'.format(type(actor_network)))
---> 69 actor_network.create_variables()
70 self._actor_network = actor_network
71 self._observation_normalizer = observation_normalizer
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tf_agents\networks\network.py in create_variables(self, **kwargs)
143 if not self.built:
144 random_input = tensor_spec.sample_spec_nest(
--> 145 self.input_tensor_spec, outer_dims=(0,))
146 random_state = tensor_spec.sample_spec_nest(
147 self.state_spec, outer_dims=(0,))
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tf_agents\specs\tensor_spec.py in sample_spec_nest(structure, seed, outer_dims)
343 raise TypeError("Spec type not supported: '{}'".format(spec))
344
--> 345 return tf.nest.map_structure(sample_fn, structure)
346
347
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tensorflow_core\python\util\nest.py in map_structure(func, *structure, **kwargs)
534
535 return pack_sequence_as(
--> 536 structure[0], [func(*x) for x in entries],
537 expand_composites=expand_composites)
538
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tensorflow_core\python\util\nest.py in <listcomp>(.0)
534
535 return pack_sequence_as(
--> 536 structure[0], [func(*x) for x in entries],
537 expand_composites=expand_composites)
538
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tf_agents\specs\tensor_spec.py in sample_fn(spec)
339 spec = BoundedTensorSpec.from_spec(spec)
340 return sample_bounded_spec(
--> 341 spec, outer_dims=outer_dims, seed=seed_stream())
342 else:
343 raise TypeError("Spec type not supported: '{}'".format(spec))
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tf_agents\specs\tensor_spec.py in sample_bounded_spec(spec, seed, outer_dims)
249 maxval=maxval,
250 dtype=sampling_dtype,
--> 251 seed=seed)
252 else:
253 minval = minval.item(0) if minval.ndim != 0 else minval
c:\users\olive\anaconda3\envs\ea\lib\site-packages\tf_agents\specs\tensor_spec.py in _random_uniform_int(shape, outer_dims, minval, maxval, dtype, seed)
161 # behavior.
162 # However +1 could cause overflow, in such cases we use the original maxval.
--> 163 maxval = np.broadcast_to(maxval, minval.shape).astype(dtype.as_numpy_dtype)
164 minval = np.broadcast_to(minval, maxval.shape).astype(dtype.as_numpy_dtype)
165
<__array_function__ internals> in broadcast_to(*args, **kwargs)
c:\users\olive\anaconda3\envs\ea\lib\site-packages\numpy\lib\stride_tricks.py in broadcast_to(array, shape, subok)
180 [1, 2, 3]])
181 """
--> 182 return _broadcast_to(array, shape, subok=subok, readonly=True)
183
184
c:\users\olive\anaconda3\envs\ea\lib\site-packages\numpy\lib\stride_tricks.py in _broadcast_to(array, shape, subok, readonly)
118 array = np.array(array, copy=False, subok=subok)
119 if not shape and array.shape:
--> 120 raise ValueError('cannot broadcast a non-scalar to a scalar array')
121 if any(size < 0 for size in shape):
122 raise ValueError('all elements of broadcast shape must be non-'
ValueError: cannot broadcast a non-scalar to a scalar array
In call to configurable 'ActorPolicy' (<function ActorPolicy.__init__ at 0x0000022E3670AB88>)
In call to configurable 'PPOAgent' (<function PPOAgent.__init__ at 0x0000022E3676B3A8>)