Does the sequence of datasets need to shuffle? In the code, shuffle is set False
zsrluminous opened this issue · comments
Does the sequence of datasets need to shuffle? In the code, shuffle is set False
Hi, if you look at the class of GroupGenerator (
triq/src/train/group_generator.py
Line 12 in 2dbcdf8
ok, i see, thanks. I have another problem,:
i build self-dataset, with only the MOS score, and want to train a model to predict MOS score.
In the code, only need to modify the ‘n_quality_levels‘ =1 ?
in the below evaluation part,
self.mos_scales = np.array([1, 2, 3, 4, 5])
callbacks\evaluation_callback_generator.py
def evaluation(self, iq_generator):
def evaluation(self, iq_generator):
prediction.append(np.sum(np.multiply(self.mos_scales, prediction_batch[i,:])))
scores.append(np.sum(np.multiply(self.mos_scales, scores_batch[:, np.newaxis][i, :])))
Do the code in these places need to be revised?
ok, i see, thanks. I have another problem,: i build self-dataset, with only the MOS score, and want to train a model to predict MOS score. In the code, only need to modify the ‘n_quality_levels‘ =1 ?
in the below evaluation part, self.mos_scales = np.array([1, 2, 3, 4, 5])
callbacks\evaluation_callback_generator.py
def evaluation(self, iq_generator): def evaluation(self, iq_generator): prediction.append(np.sum(np.multiply(self.mos_scales, prediction_batch[i,:]))) scores.append(np.sum(np.multiply(self.mos_scales, scores_batch[:, np.newaxis][i, :])))
Do the code in these places need to be revised?
No, you don't need to change that. This is controlled by the argument "using_single_mos". If you set n_quality_levels =1, then using_single_mos will be automatically set to True.
ok, the result of the model i train several epoch is :
in the train process:
predictions is [25.93677520751953, 29.13662552833557, 28.822256326675415, 28.979650139808655, 28.93711247467, 28.575502038002014, 28.48992347717285, 28.37987780570984, 28.48713219165802, ...]
mos_scores is [17.0835, 40.0005, 60.0, 67.083, 40.0005, 60.0, 67.917, 60.0, 67.083, 69.58349999999999,000000004, 23.334000000000003, 23.334000000000003, 67.083, 23.334000000000003, 40.0005, 67.083, 17.083835, 17.0835, 17.0835, 62.916000000000004, 67.083, 17.0835, 60.0, ...]
in the predict process:
the test image is sample_data/example_image_2 (mos=2.865).jpg
prediction[0] is [1.3698878]
Predicted MOS: 20.548317432403564
the mos score range seems not be in [1,5], but in [15, 75] because of multipling with self.mos_scale?
is the mos range correct?
Please show me your complete training scripts and some examples of your MOS values.
the code i changed are the follows:
def train_main(args):
if args['multi_gpu'] == 0:
gpus = tf.config.experimental.list_physical_devices('GPU')
print(f"gpus is {gpus}")
tf.config.experimental.set_visible_devices(gpus[args['gpu']], 'GPU')
result_folder = args['result_folder']
model_name = 'triq_conv2D_all'
# Define loss function according to prediction objective (score distribution or MOS)
if args['n_quality_levels'] > 1:
using_single_mos = False
loss = 'categorical_crossentropy'
metrics = None
model_name += '_distribution'
else:
using_single_mos = True
metrics = None
loss = 'mse'
model_name += '_mos'
if args['lr_base'] < 1e-4 / 2:
model_name += '_finetune'
if not args['image_aug']:
model_name += '_no_imageaug'
optimizer = Adam(args['lr_base'])
if args['multi_gpu'] > 0:
strategy = tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
with strategy.scope():
# Everything that creates variables should be under the strategy scope.
# In general this is only model construction & `compile()`.
model = create_triq_model(n_quality_levels=1,
input_shape=(None, None, 3),
backbone=args['backbone'],
maximum_position_encoding=193)
model.compile(loss=loss, optimizer=optimizer, metrics=[metrics])
else:
model = create_triq_model(n_quality_levels=1,
input_shape=(None, None, 3),
backbone=args['backbone'],
maximum_position_encoding=193)
model.compile(loss=loss, optimizer=optimizer, metrics=[metrics])
# model.summary()
print('Load ImageNet weights')
model.load_weights(args['weights'], by_name=True)
imagenet_pretrain = True
# Define train and validation data
image_scores = get_image_scores(args['koniq_mos_file'], args['live_mos_file'], using_single_mos=using_single_mos)
# print(f"image scores is {image_scores}")
train_image_file_groups, train_score_groups = get_image_score_from_groups(args['train_folders'], image_scores)
train_generator = GroupGenerator(train_image_file_groups,
train_score_groups,
batch_size=args['batch_size'],
image_aug=args['image_aug'],
imagenet_pretrain=imagenet_pretrain)
train_steps = train_generator.__len__()
if args['val_folders'] is not None:
test_image_file_groups, test_score_groups = get_image_score_from_groups(args['val_folders'], image_scores)
validation_generator = GroupGenerator(test_image_file_groups,
test_score_groups,
batch_size=args['batch_size'],
image_aug=False,
imagenet_pretrain=imagenet_pretrain)
validation_steps = validation_generator.__len__()
evaluation_callback = ModelEvaluationIQGenerator(validation_generator,
using_single_mos,
evaluation_generator=None)
else:
evaluation_callback = None
validation_generator = None
validation_steps = 0
result_folder = os.path.join(result_folder, model_name)
if not os.path.exists(result_folder):
os.makedirs(result_folder)
# Create callbacks including evaluation and learning rate scheduler
callbacks = create_callbacks(model_name,
result_folder,
evaluation_callback,
checkpoint=True,
early_stop=True,
metrics=metrics)
warmup_epochs = 10
if args['lr_schedule']:
total_train_steps = args['epochs'] * train_steps
warmup_steps = warmup_epochs * train_steps
warmup_lr = WarmUpCosineDecayScheduler(learning_rate_base=args['lr_base'],
total_steps=total_train_steps,
warmup_learning_rate=0.0,
warmup_steps=warmup_steps,
hold_base_rate_steps=30 * train_steps,
verbose=1)
callbacks.append(warmup_lr)
# Define optimizer and train
model_history = model.fit(x=train_generator,
epochs=args['epochs'],
steps_per_epoch=train_steps,
validation_data=validation_generator,
validation_steps=validation_steps,
verbose=1,
shuffle=False,
callbacks=callbacks,
initial_epoch=args['initial_epoch'],
)
# model.save(os.path.join(result_folder, model_name + '.h5'))
# plot_history(model_history, result_folder, model_name)
best_weights_file = identify_best_weights(result_folder, model_history.history, callbacks[3].best)
remove_non_best_weights(result_folder, [best_weights_file])
# do fine-tuning
if args['do_finetune'] and best_weights_file:
print('Finetune...')
del (callbacks[-1])
model.load_weights(best_weights_file)
finetune_lr = 1e-6
if args['lr_schedule']:
warmup_lr_finetune = WarmUpCosineDecayScheduler(learning_rate_base=finetune_lr,
total_steps=total_train_steps,
warmup_learning_rate=0.0,
warmup_steps=warmup_steps,
hold_base_rate_steps=10 * train_steps,
verbose=1)
callbacks.append(warmup_lr_finetune)
finetune_optimizer = Adam(finetune_lr)
model.compile(loss=loss, optimizer=finetune_optimizer, metrics=[metrics])
finetune_model_history = model.fit(x=train_generator,
epochs=args['epochs'],
steps_per_epoch=train_steps,
validation_data=validation_generator,
validation_steps=validation_steps,
verbose=1,
shuffle=False,
callbacks=callbacks,
initial_epoch=args['initial_epoch'],
)
best_weights_file_finetune = identify_best_weights(result_folder, finetune_model_history.history, callbacks[3].best)
remove_non_best_weights(result_folder, [best_weights_file, best_weights_file_finetune])
args = {}
args['multi_gpu'] = 0
args['gpu'] = 0
args['result_folder'] = '/coda/02_vqa/triq-icme-train-code/src/result/'
args['n_quality_levels'] = 1
args['backbone'] = 'resnet50'
args['train_folders'] = ['/data/02_VQA/triq-icme-dataset-tmp/train']
args['val_folders'] = ['/data/02_VQA/triq-icme-dataset-tmp/val']
args['koniq_mos_file'] = '/coda/02_vqa/triq-icme-train-code/src/databases/triq_icme_mos_file_tmp.csv'
args['live_mos_file'] = '/coda/02_vqa/triq-icme-train-code/src/databases/live_mos.csv'
args['initial_epoch'] = 0
args['lr_base'] = 1e-4/2
args['lr_schedule'] = True
args['batch_size'] = 16
args['epochs'] = 120
args['image_aug'] = True
# args['weights'] = r'.\pretrained_weights\vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
args['weights'] = '/coda/02_vqa/triq-icme-train-code/src/pretrained_weights/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
args['do_finetune'] = False
train_main(args)
my mos file is like:
UGC0001_720x1280_30_crf_00_1.jpg,4.6552
UGC0001_720x1280_30_crf_17_1.jpg,4.3793
UGC0001_720x1280_30_crf_22_1.jpg,4.5172
UGC0001_720x1280_30_crf_32_5.jpg,3.4828
UGC0003_720x1280_30_crf_42_24.jpg,1.5556
.....
Hi, I just made a commit to evaluation_callback_generator.py, that should fix your problem.
yes, it's ok, thank you very much