Trying to reuse LSTM weights in Keras from a model built in DyNet
jannickj opened this issue Β· comments
Hi π
I am currently trying to use weights from a model in dynet:
dynet model:
dy_model = dy.BiRNNBuilder(layers, dim_input, dim_hidden, model, dy.LSTMBuilder)
Keras model:
def multi_layer_birnn(input, layer_count, dim_hidden, params):
out = input
dim_hidden = int(dim_hidden / 2)
for li in range(layer_count):
layer_params = params[li]
fwparams = layer_params['fw']
bwparams = layer_params['bw']
forward_cell = layers.LSTM(
dim_hidden,
return_sequences=True,
unit_forget_bias=False,
kernel_initializer =keras.initializers.Constant(fwparams['W']),
recurrent_initializer=keras.initializers.Constant(fwparams['U']),
bias_initializer=keras.initializers.Constant(fwparams['b']),
)
forward_cell.trainable = False
backward_cell = layers.LSTM(
dim_hidden,
return_sequences=True,
unit_forget_bias=False,
kernel_initializer =keras.initializers.Constant(bwparams['W']),
recurrent_initializer=keras.initializers.Constant(bwparams['U']),
bias_initializer=keras.initializers.Constant(bwparams['b']),
go_backwards=True,
)
backward_cell.trainable = False
birnn = layers.Bidirectional(forward_cell, backward_layer=backward_cell)
birnn.trainable = False
out = birnn(out)
return out
ws_rnn = multi_layer_birnn(ws_input, LAYERS, DIM_HIDDEN, params['birnn'])
to get params from Dynet I do the following
rnn_layers = []
for layer in dy_model.builder_layers:
fworbw = []
for fwbw in layer:
(W, U, b) = fwbw.param_collection().parameters_list()
b = b.as_array()
W = W.as_array()
U = U.as_array()
fworbw.append({
'U': U,
'W': W,
'b': b,
})
rnn_layers.append({
'fw': fworbw[0],
'bw': fworbw[1],
})
params = { 'birnn: rnn_layers }
I've tested and made sure that the inputs to the lstm are completely identical, however I get different output between the two frameworks :(
Found the issue, so I'll just leave it here in case any poor soul has to deal with same issue:
dynet stores it's weights [HIDDEN,INPUT]
and keras stores it [INPUT,HIDDEN]
and the order of weights are different dynet is i, f, o, c
and keras is i, f, c, o
so the code to fix it is:
W_i = W[:units, :]
W_f = W[units: units * 2, :]
W_o = W[units * 2: units * 3, :]
W_c = W[units * 3:, :]
U_i = U[:units, :]
U_f = U[units: units * 2, :]
U_o = U[units * 2: units * 3, :]
U_c = U[units * 3:, :]
b_i = b[:units]
b_f = b[units: units * 2]
b_o = b[units * 2: units * 3]
b_c = b[units * 3:]
def into_keras(i, f, c, o):
d = [i, f, c, o]
return np.transpose(np.concatenate(d,axis=0))
fworbw.append({
'W': into_keras(W_i, W_f, W_c, W_o),
'U': into_keras(U_i, U_f, U_c, U_o),
'b': into_keras(b_i, b_f, b_c, b_o),
})