magic-research / PLLaVA

Hi,

When I evaluate the model on videoqabench, the model doesn't generate any answer but the prompt only here. The bug is very similar to this closed issue, but I still have this issue after adding your new commits. Can you please take a look at it? Thanks!

Hi,

I found the error that I set the wrong model_dir. To double confirm with you, in the eval.sh, I should set the model_dir to the folder of pertained lava-v1.6 and the weight_dir to my own fine-tuned folder (the lora weights), am I right?

Another question is that the evaluation give warning UserWarning: do_sample is set to False. However, top_p is set to 0.9 -- this flag is only used in sample-based generation modes. You should set do_sample=True or unset top_p. This is normal and should reproduce your results, right?

Thank you!

Hi,
For the first question, directly passing in the MODELS/pllava-7b should be fine, as long as you downloaded it from huggingface. The demo and evaluation shares a loading function here

PLLaVA/tasks/eval/model_utils.py

Lines 39 to 125 in fd9194a

    
           def load_pllava(repo_id, num_frames, use_lora=False, weight_dir=None, lora_alpha=32, use_multi_gpus=False, pooling_shape=(16,12,12)): 
        
               kwargs = { 
        
                   'num_frames': num_frames, 
        
               } 
        
               # print("===============>pooling_shape", pooling_shape) 
        
               if num_frames == 0: 
        
                   kwargs.update(pooling_shape=(0,12,12)) # produce a bug if ever usen the pooling projector 
        
               config = PllavaConfig.from_pretrained( 
        
                   repo_id if not use_lora else weight_dir, 
        
                   pooling_shape=pooling_shape, 
        
                   **kwargs, 
        
               ) 
        
               with torch.no_grad(): 
        
                   model = PllavaForConditionalGeneration.from_pretrained(repo_id, config=config, torch_dtype=torch.bfloat16) 
        
               try: 
        
                   processor = PllavaProcessor.from_pretrained(repo_id) 
        
               except Exception as e: 
        
                   processor = PllavaProcessor.from_pretrained('llava-hf/llava-1.5-7b-hf') 
        
               # config lora 
        
               if use_lora and weight_dir is not None: 
        
                   print("Use lora") 
        
                   peft_config = LoraConfig( 
        
                       task_type=TaskType.CAUSAL_LM, inference_mode=False,  target_modules=["q_proj", "v_proj"], 
        
                       r=128, lora_alpha=lora_alpha, lora_dropout=0. 
        
                   ) 
        
                   print("Lora Scaling:", lora_alpha/128) 
        
                   model.language_model = get_peft_model(model.language_model, peft_config) 
        
                   assert weight_dir is not None, "pass a folder to your lora weight" 
        
                   print("Finish use lora") 
        
               # load weights 
        
               if weight_dir is not None: 
        
                   state_dict = {} 
        
                   save_fnames = os.listdir(weight_dir) 
        
                   if "model.safetensors" in save_fnames: 
        
                       use_full = False 
        
                       for fn in save_fnames: 
        
                           if fn.startswith('model-0'): 
        
                               use_full=True         
        
                               break 
        
                   else: 
        
                       use_full= True 
        
                   if not use_full: 
        
                       print("Loading weight from", weight_dir, "model.safetensors") 
        
                       with safe_open(f"{weight_dir}/model.safetensors", framework="pt", device="cpu") as f: 
        
                           for k in f.keys(): 
        
                               state_dict[k] = f.get_tensor(k) 
        
                   else: 
        
                       print("Loading weight from", weight_dir) 
        
                       for fn in save_fnames: 
        
                           if fn.startswith('model-0'): 
        
                               with safe_open(f"{weight_dir}/{fn}", framework="pt", device="cpu") as f: 
        
                                   for k in f.keys(): 
        
                                       state_dict[k] = f.get_tensor(k) 
        
                   if 'model' in state_dict.keys(): 
        
                       msg = model.load_state_dict(state_dict['model'], strict=False) 
        
                   else: 
        
                       msg = model.load_state_dict(state_dict, strict=False) 
        
                   print(msg) 
        
               # dispatch model weight 
        
               if use_multi_gpus: 
        
                   max_memory = get_balanced_memory( 
        
                       model, 
        
                       max_memory=None, 
        
                       no_split_module_classes=["LlamaDecoderLayer"], 
        
                       dtype='bfloat16', 
        
                       low_zero=False, 
        
                   ) 
        
                   device_map = infer_auto_device_map( 
        
                       model, 
        
                       max_memory=max_memory, 
        
                       no_split_module_classes=["LlamaDecoderLayer"], 
        
                       dtype='bfloat16' 
        
                   ) 
        
                   dispatch_model(model, device_map=device_map) 
        
                   print(model.hf_device_map) 
        
               model = model.eval() 
        
               return model, processor

So the weights could be loaded from two sources:

model_dir: this should have the weights name as the original transformer Pllava model.
weight_dir: this is loaded after constructing the PeftModel, so should have weight named as the PeftModel

I think as long as the weights are loaded from one above, it should be fine. Loading from the downloaded weights, you should see "" in the terminal.

For the second question, the UserWarning is also seen in our evaluation, so it's safe by far.

BTW, I just fixed a response postprocess bug at fd9194a. So you might consider keeping up with the newest code. The former code might leave a leading space in the answer and it seems that chatgpt evaluation is sensitive to this leading space in the response (vcg score 3.10 v.s. 3.03 for pllava-7b with lora alpha set to 4).

Thank you very much for this clarification! I have another question about LoRA alpha. I found that the default training config uses 32 lora_alpha but evaluation uses 4 instead. After I changed the evaluation's lora_alpha to 32, the performance dropped a lot (e.g., MSVD ~77% -> ~73%). Did you observe the same thing?

Thank you very much for this clarification! I have another question about LoRA alpha. I found that the default training config uses 32 lora_alpha but evaluation uses 4 instead. After I changed the evaluation's lora_alpha to 32, the performance dropped a lot (e.g., MSVD ~77% -> ~73%). Did you observe the same thing?

I noticed the same thing the lora_alpha is not consistent between the training and inference stages. As shown in Fig.9, the author claims that using lower alpha in the test stage achieves better performance.

	def load_pllava(repo_id, num_frames, use_lora=False, weight_dir=None, lora_alpha=32, use_multi_gpus=False, pooling_shape=(16,12,12)):
	kwargs = {
	'num_frames': num_frames,
	}
	# print("===============>pooling_shape", pooling_shape)
	if num_frames == 0:
	kwargs.update(pooling_shape=(0,12,12)) # produce a bug if ever usen the pooling projector
	config = PllavaConfig.from_pretrained(
	repo_id if not use_lora else weight_dir,
	pooling_shape=pooling_shape,
	**kwargs,
	)

	with torch.no_grad():
	model = PllavaForConditionalGeneration.from_pretrained(repo_id, config=config, torch_dtype=torch.bfloat16)

	try:
	processor = PllavaProcessor.from_pretrained(repo_id)
	except Exception as e:
	processor = PllavaProcessor.from_pretrained('llava-hf/llava-1.5-7b-hf')

	# config lora
	if use_lora and weight_dir is not None:
	print("Use lora")
	peft_config = LoraConfig(
	task_type=TaskType.CAUSAL_LM, inference_mode=False, target_modules=["q_proj", "v_proj"],
	r=128, lora_alpha=lora_alpha, lora_dropout=0.
	)
	print("Lora Scaling:", lora_alpha/128)
	model.language_model = get_peft_model(model.language_model, peft_config)
	assert weight_dir is not None, "pass a folder to your lora weight"
	print("Finish use lora")

	# load weights
	if weight_dir is not None:
	state_dict = {}
	save_fnames = os.listdir(weight_dir)
	if "model.safetensors" in save_fnames:
	use_full = False
	for fn in save_fnames:
	if fn.startswith('model-0'):
	use_full=True
	break
	else:
	use_full= True

	if not use_full:
	print("Loading weight from", weight_dir, "model.safetensors")
	with safe_open(f"{weight_dir}/model.safetensors", framework="pt", device="cpu") as f:
	for k in f.keys():
	state_dict[k] = f.get_tensor(k)
	else:
	print("Loading weight from", weight_dir)
	for fn in save_fnames:
	if fn.startswith('model-0'):
	with safe_open(f"{weight_dir}/{fn}", framework="pt", device="cpu") as f:
	for k in f.keys():
	state_dict[k] = f.get_tensor(k)

	if 'model' in state_dict.keys():
	msg = model.load_state_dict(state_dict['model'], strict=False)
	else:
	msg = model.load_state_dict(state_dict, strict=False)
	print(msg)
	# dispatch model weight
	if use_multi_gpus:
	max_memory = get_balanced_memory(
	model,
	max_memory=None,
	no_split_module_classes=["LlamaDecoderLayer"],
	dtype='bfloat16',
	low_zero=False,
	)

	device_map = infer_auto_device_map(
	model,
	max_memory=max_memory,
	no_split_module_classes=["LlamaDecoderLayer"],
	dtype='bfloat16'
	)

	dispatch_model(model, device_map=device_map)
	print(model.hf_device_map)

	model = model.eval()

	return model, processor

Evaluation bug