[Op:PadGrad] gradients missing/broken in ML Compute?

Question

[Op:PadGrad] gradients missing/broken in ML Compute?

justinalsing opened this issue 3 years ago · comments

When trying to take gradients through the below model, I get an error that looks like the gradients for the operation PadGrad may be missing in ML compute (or otherwise broken)?

Here's the error:

AbortedError: Compute: Operation received an exception: Compute: No MLCTrainingGraph has been found. [Op:PadGrad]

Which arises when taking gradients through this model (works fine without ML Compute):

class AutoregressiveNeuralSplineFlow(tf.Module):
    
    def __init__(self, nbins=32, ndim=3, nconditional=3, nhidden=[10, 10], activation=tf.tanh, base_loc=0., base_scale=0.25, spline_min=-1., spline_range=2.):
        
        # spline bins
        self._nbins = nbins
        
        # density and conditional dimensions
        self._ndim = ndim
        self._nconditional = nconditional
        
        # hidden units and activation function
        self._nhidden = nhidden
        self._activation = activation
        
        # loc and scale for the (normal) base density, spline set-up
        self._base_loc = base_loc
        self._base_scale = base_scale
        self._spline_min = spline_min
        self._spline_range = spline_range
        
        # conditional autoregressive network parameterizing the bin widths
        self._bin_widths_ = tfb.AutoregressiveNetwork(params=self._nbins, 
                                                     event_shape=self._ndim, 
                                                     conditional=True, 
                                                     conditional_event_shape=self._nconditional,
                                                     hidden_units=self._nhidden,
                                                     activation=self._activation)

        # conditional autoregressive network parameterizing the bin heights
        self._bin_heights_ = tfb.AutoregressiveNetwork(params=self._nbins, 
                                                     event_shape=self._ndim, 
                                                     conditional=True, 
                                                     conditional_event_shape=self._nconditional,
                                                     hidden_units=self._nhidden,
                                                     activation=self._activation)
        
        # conditional autoregressive network parameterizing the slopes
        self._knot_slopes_ = tfb.AutoregressiveNetwork(params=self._nbins-1, 
                                                     event_shape=self._ndim, 
                                                     conditional=True, 
                                                     conditional_event_shape=self._nconditional,
                                                     hidden_units=self._nhidden,
                                                     activation=self._activation)

    # softmax the bin widths
    def bin_widths(self, x, y):
        
        return tf.math.softmax(self._bin_widths_(x, conditional_input=y), axis=-1) * (self._spline_range - self._nbins * 1e-2) + 1e-2
    
    # softmax the bin heights
    def bin_heights(self, x, y):
        
        return tf.math.softmax(self._bin_heights_(x, conditional_input=y), axis=-1) * (self._spline_range - self._nbins * 1e-2) + 1e-2

    # softplus the knot slopes
    def knot_slopes(self, x, y):
        
        return tf.math.softplus(self._knot_slopes_(x, conditional_input=y)) + 1e-2

    # construct spline bijector given inputs x and conditional inputs y
    def spline(self, x, y):
   
        return tfb.RationalQuadraticSpline(
            bin_widths=self.bin_widths(x, y),
            bin_heights=self.bin_heights(x, y),
            knot_slopes=self.knot_slopes(x, y),
            range_min=self._spline_min)

    # construct transformed distribution given inputs x and conditional inputs y
    def __call__(self, x, y):
        
        return tfd.TransformedDistribution(tfd.Normal(loc=self._base_loc, scale=self._base_scale), bijector=self.spline(x, y))
    
    # log probability for inputs x and conditionals y
    def log_prob(self, x, y):
        
        return tf.math.reduce_sum(self.__call__(x, y).log_prob(x), axis=-1)

I can call the model fine, the error only occurs when taking gradients. Any help would be much appreciated.