one question, how to do Incremental learning in drain3 training?
CH-nolyn opened this issue · comments
CH-nolyn commented
`import json
import logging
import sys
import time
from util.config_reader import initialize_template_config
from util.httpserver_operation import training_post_model
from drain3.file_persistence import FilePersistence
from drain3 import TemplateMiner
def process_log_training(raw_log_path, query_data):
logger = logging.getLogger(name)
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
scenario = query_data["scenario"]
output_file = f"{scenario}/drain3_state.bin"
persistence = FilePersistence(output_file)
template_miner = TemplateMiner(persistence, config=initialize_template_config(profiling_enabled=True))
line_count = 0
with open(raw_log_path, encoding='utf-8') as f:
lines = f.readlines()
start_time = time.time()
batch_start_time = start_time
batch_size = 10000
# 逐行训练
for line in lines:
line = line.rstrip()
result = template_miner.add_log_message(line)
line_count += 1
if line_count % batch_size == 0:
time_took = time.time() - batch_start_time
rate = batch_size / time_took
logger.info(f"Processing line: {line_count}, rate {rate:.1f} lines/sec, "
f"{len(template_miner.drain.clusters)} clusters so far.")
batch_start_time = time.time()
if result["change_type"] != "none":
result_json = json.dumps({
result["cluster_id"]: {
"template_mined": result["template_mined"]
}
})
logger.info(f"Input ({line_count}): " + line)
logger.info("Result: " + result_json)
time_took = time.time() - start_time
rate = line_count / time_took
logger.info(
f"--- Done processing file in {time_took:.2f} sec. Total of {line_count} lines, rate {rate:.1f} lines/sec, "
f"{len(template_miner.drain.clusters)} clusters")
sorted_clusters = sorted(template_miner.drain.clusters, key=lambda it: it.size, reverse=True)
for cluster in sorted_clusters:
logger.info(cluster)
print("Prefix Tree:")
template_miner.drain.print_tree()
template_miner.profiler.report(0)
training_post_model(output_file)
`
this is my training code ,so how could i train new log by ex-trained model.