keras-rl / keras-rl

Deep Reinforcement Learning for Keras.

Home Page:http://keras-rl.readthedocs.io/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

DDPG worked well but not CDQN or NAF !

B-Yassine opened this issue · comments

I tried DDPG and everything worked well, now I am trying your NAF example model on my custom environment and I am getting this error:

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
~\.conda\envs\sim\lib\site-packages\tensorflow\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1625   try:
-> 1626     c_op = c_api.TF_FinishOperation(op_desc)
   1627   except errors.InvalidArgumentError as e:

InvalidArgumentError: Dimension 1 in both shapes must be equal, but are 1 and 171. Shapes are [?,1] and [?,171].
	From merging shape 0 with other shapes. for 'naf_layer_1/concat/concat_dim' (op: 'Pack') with input shapes: [?,1], [?,171].

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-9-c87ee2027050> in <module>
      7                  memory=memory, nb_steps_warmup=100, random_process=random_process,
      8                  gamma=.99, target_model_update=1e-3, processor=processor)
----> 9 agent.compile(Adam(lr=1e-5, clipnorm=1.), metrics=['mae'])
     10 
     11 # Okay, now it's time to learn something! We visualize the training here for show, but this

~\.conda\envs\sim\lib\site-packages\rl\agents\dqn.py in compile(self, optimizer, metrics)
    613 
    614                 mu_out = self.mu_model(os_in)
--> 615                 A_out = NAFLayer(self.nb_actions, mode=self.covariance_mode)([L_out, mu_out, a_in])
    616                 combined_out = Lambda(lambda x: x[0]+x[1], output_shape=lambda x: x[0])([A_out, V_out])
    617                 combined = Model(inputs=[a_in] + os_in, outputs=[combined_out])

~\.conda\envs\sim\lib\site-packages\keras\engine\base_layer.py in __call__(self, inputs, **kwargs)
    455             # Actually call the layer,
    456             # collecting output(s), mask(s), and shape(s).
--> 457             output = self.call(inputs, **kwargs)
    458             output_mask = self.compute_mask(inputs, previous_mask)
    459 

~\.conda\envs\sim\lib\site-packages\rl\agents\dqn.py in call(self, x, mask)
    431                                 try:
    432                                         # Old TF behavior.
--> 433                                         L_flat = tf.concat(1, [zeros, L_flat])
    434                                 except TypeError:
    435                                         # New TF behavior

~\.conda\envs\sim\lib\site-packages\tensorflow\python\ops\array_ops.py in concat(values, axis, name)
   1119       ops.convert_to_tensor(
   1120           axis, name="concat_dim",
-> 1121           dtype=dtypes.int32).get_shape().assert_is_compatible_with(
   1122               tensor_shape.scalar())
   1123       return identity(values[0], name=scope)

~\.conda\envs\sim\lib\site-packages\tensorflow\python\framework\ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
   1046       name=name,
   1047       preferred_dtype=preferred_dtype,
-> 1048       as_ref=False)
   1049 
   1050 

~\.conda\envs\sim\lib\site-packages\tensorflow\python\framework\ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
   1142 
   1143     if ret is None:
-> 1144       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1145 
   1146     if ret is NotImplemented:

~\.conda\envs\sim\lib\site-packages\tensorflow\python\ops\array_ops.py in _autopacking_conversion_function(v, dtype, name, as_ref)
    969   elif dtype != inferred_dtype:
    970     v = nest.map_structure(_cast_nested_seqs_to_dtype(dtype), v)
--> 971   return _autopacking_helper(v, dtype, name or "packed")
    972 
    973 

~\.conda\envs\sim\lib\site-packages\tensorflow\python\ops\array_ops.py in _autopacking_helper(list_or_tuple, dtype, name)
    921           elems_as_tensors.append(
    922               constant_op.constant(elem, dtype=dtype, name=str(i)))
--> 923       return gen_array_ops.pack(elems_as_tensors, name=scope)
    924     else:
    925       return converted_elems

~\.conda\envs\sim\lib\site-packages\tensorflow\python\ops\gen_array_ops.py in pack(values, axis, name)
   4687     axis = _execute.make_int(axis, "axis")
   4688     _, _, _op = _op_def_lib._apply_op_helper(
-> 4689         "Pack", values=values, axis=axis, name=name)
   4690     _result = _op.outputs[:]
   4691     _inputs_flat = _op.inputs

~\.conda\envs\sim\lib\site-packages\tensorflow\python\framework\op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    785         op = g.create_op(op_type_name, inputs, output_types, name=scope,
    786                          input_types=input_types, attrs=attr_protos,
--> 787                          op_def=op_def)
    788       return output_structure, op_def.is_stateful, op
    789 

~\.conda\envs\sim\lib\site-packages\tensorflow\python\util\deprecation.py in new_func(*args, **kwargs)
    486                 'in a future version' if date is None else ('after %s' % date),
    487                 instructions)
--> 488       return func(*args, **kwargs)
    489     return tf_decorator.make_decorator(func, new_func, 'deprecated',
    490                                        _add_deprecated_arg_notice_to_docstring(

~\.conda\envs\sim\lib\site-packages\tensorflow\python\framework\ops.py in create_op(***failed resolving arguments***)
   3270           input_types=input_types,
   3271           original_op=self._default_original_op,
-> 3272           op_def=op_def)
   3273       self._create_op_helper(ret, compute_device=compute_device)
   3274     return ret

~\.conda\envs\sim\lib\site-packages\tensorflow\python\framework\ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
   1788           op_def, inputs, node_def.attr)
   1789       self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1790                                 control_input_ops)
   1791 
   1792     # Initialize self._outputs.

~\.conda\envs\sim\lib\site-packages\tensorflow\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
   1627   except errors.InvalidArgumentError as e:
   1628     # Convert to ValueError for backwards compatibility.
-> 1629     raise ValueError(str(e))
   1630 
   1631   return c_op

ValueError: Dimension 1 in both shapes must be equal, but are 1 and 171. Shapes are [?,1] and [?,171].
	From merging shape 0 with other shapes. for 'naf_layer_1/concat/concat_dim' (op: 'Pack') with input shapes: [?,1], [?,171].
  1. keras version: 2.2.4
  2. Tensorflow version: 1.11.0

I also tried changing the model from Sequential to functional API and I am getting the exact same error!

ps: 171 comes from:

# Number of elements in a triangular matrix.
nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2

as my nb_actions = 18

I solved it by permuting between the old and new TF concat function by changing this:

try:
  # Old TF behavior.
  L_flat = tf.concat(1, [zeros, L_flat])
  except TypeError:
  # New TF behavior
  L_flat = tf.concat([zeros, L_flat], 1)

To this:

try:
    # New TF behavior
    L_flat = tf.concat([zeros, L_flat], 1)
    except TypeError:
    # Old TF behavior.
    L_flat = tf.concat(1, [zeros, L_flat])