[Bug] Repeated include statements within the same source.cu/source.cc file
BolinSNLHM opened this issue · comments
Describe the bug
While working on benchmarking Hidet for CPU support, I noticed that during the tuning process, within a single source.cc/source.cu file, the block of #include statements:
#include <stdint.h>
#include <hidet/runtime/symbols.h>
#include <hidet/runtime/memory_planner.h>
#include <hidet/runtime/cpu/context.h>
#include <hidet/runtime/cuda/complex.h>
#include <hidet/runtime/cuda/context.h>
#include <hidet/runtime/logging.h>
Will appear multiple times, one before each candidate_xx
namespace.
You can refer to the attached source.cu
file as an example.
To Reproduce
Script:
import hidet
from typing import List
import hidet.testing
hidet.option.cache_dir('./mycache-gpt2')
hidet.option.search_space(2)
def generate_hidet(model, text, input_ids, position_ids, past_keys, past_values, device, tokens_to_generate=10):
output_ids = []
for _ in range(tokens_to_generate):
input_ids, position_ids, past_keys, past_values = model(input_ids, position_ids, past_keys, past_values)
output_ids.append(input_ids[0].item())
return output_ids
gpt2_module = hidet.testing.models.gpt2.model(disable_cache=True)
gpt2_module.cuda()
input_ids = hidet.symbol(['seq_length'], dtype=hidet.int32, device='cuda')
position_ids = hidet.symbol(['seq_length'], dtype=hidet.int32, device='cuda')
cache_shape = [gpt2_module.num_hidden_layers, gpt2_module.num_heads, 'prev_seq_length', gpt2_module.head_dim]
past_keys = hidet.symbol(cache_shape, dtype=hidet.float32, device='cuda')
past_values = hidet.symbol(cache_shape, dtype=hidet.float32, device='cuda')
outputs = gpt2_module(input_ids, position_ids, past_keys, past_values)
graph = hidet.trace_from(outputs, inputs=[input_ids, position_ids, past_keys, past_values])
graph = hidet.graph.optimize(graph)
compiled_model = graph.build(space=2)
compiled_model.save('./benchmark_outs2/compiled.hidet')
text = "This is just an example..."
hidet_tokenizer = hidet.testing.models.gpt2.tokenizer()
hidet_input_ids_list: List[int] = hidet_tokenizer(text)['input_ids']
hidet_input_ids = hidet.asarray(hidet_input_ids_list, dtype=hidet.int32, device='cuda')
hidet_position_ids = hidet.arange(hidet_input_ids.shape[0], dtype=hidet.int32, device='cuda')
hidet_past_keys = hidet.zeros([gpt2_module.num_hidden_layers, gpt2_module.num_heads, 0, gpt2_module.head_dim], dtype=hidet.float32, device='cuda')
hidet_past_values = hidet.zeros([gpt2_module.num_hidden_layers, gpt2_module.num_heads, 0, gpt2_module.head_dim], dtype=hidet.float32, device='cuda')
hidet_latency = hidet.utils.benchmark_func(
lambda: generate_hidet(
compiled_model,
text,
hidet_input_ids,
hidet_position_ids,
hidet.zeros([gpt2_module.num_hidden_layers, gpt2_module.num_heads, 0, gpt2_module.head_dim], dtype=hidet.float32, device='cuda'),
hidet.zeros([gpt2_module.num_hidden_layers, gpt2_module.num_heads, 0, gpt2_module.head_dim], dtype=hidet.float32, device='cuda'),
'cuda',
tokens_to_generate=40
),
repeat=1
)