go-hep / hep

hep is the mono repository holding all of go-hep.org/x/hep packages and tools

Home Page:https://go-hep.org

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

groot: long-running baskets crossing small/big file threshold are incorrectly handled

sbinet opened this issue · comments

long running baskets, ie: baskets that are created when a TFile is "small" (<2Gb) but are flushed when TFile is "big", are incorrectly handled.

namedly, the TKey size difference stemming from the 32b -> 64b offsets, incur inconsistencies, leading to a crash in root-dump and this kind of message in ROOT/C++:

Warning in <TBasket::ReadBasketBuffers>: basket:d_lep_pt has fNevBuf=10000000 but fEntryOffset=0, pos=2766185783, len=40000084, fNbytes=40000084, fObjlen=40000008, trying to repair

simple program to reproduce:

// +build ignore

package main

import (
	"log"
	"math/rand"

	"go-hep.org/x/hep/groot"
	"go-hep.org/x/hep/groot/riofs"
	"go-hep.org/x/hep/groot/rtree"
)

func main() {
	f, err := groot.Create("o.root", riofs.WithoutCompression())
	if err != nil {
		log.Fatalf("error: %+v", err)
	}
	defer f.Close()

	var evt struct {
		Run    int32     `groot:"runNumber"`
		Evt    int64     `groot:"eventNumber"`
		NLep   int32     `groot:"d_nlep"`
		LepPt  []float32 `groot:"d_lep_pt[d_nlep]"`
		NLep0  int32     `groot:"nlep0"`
		LepPt0 []float64 `groot:"lep_pt0[nlep0]"`
		NLep1  int32     `groot:"nlep1"`
		LepPt1 []float64 `groot:"lep_pt1[nlep1]"`
		NLep2  int32     `groot:"nlep2"`
		LepPt2 []float64 `groot:"lep_pt2[nlep2]"`
		NLep3  int32     `groot:"nlep3"`
		LepPt3 []float64 `groot:"lep_pt3[nlep3]"`
		NLep4  int32     `groot:"nlep4"`
		LepPt4 []float64 `groot:"lep_pt4[nlep4]"`
	}
	evt.LepPt = make([]float32, 0, 20)
	evt.LepPt0 = make([]float64, 0, 20)
	evt.LepPt1 = make([]float64, 0, 20)
	evt.LepPt2 = make([]float64, 0, 20)
	evt.LepPt3 = make([]float64, 0, 20)
	evt.LepPt4 = make([]float64, 0, 20)

	wvars := rtree.WriteVarsFromStruct(&evt)
	w, err := rtree.NewWriter(f, "truth", wvars)
	if err != nil {
		log.Fatalf("error: %+v", err)
	}
	defer w.Close()

	rnd := rand.New(rand.NewSource(1234))

	var N int = 1e7
	for i := 0; i < N; i++ {
		if N > 10 && i%(N/10) == 0 {
			log.Printf("evt: %d...", i)
		}
		evt.Run = int32(i)
		evt.Evt = int64(i)
		evt.NLep = 0
		evt.NLep0 = int32(rnd.Intn(10) + 1)
		evt.NLep1 = evt.NLep0
		evt.NLep2 = evt.NLep0
		evt.NLep3 = evt.NLep0
		evt.NLep4 = evt.NLep0

		evt.LepPt = evt.LepPt[:0]
		evt.LepPt0 = evt.LepPt0[:0]
		evt.LepPt1 = evt.LepPt1[:0]
		evt.LepPt2 = evt.LepPt2[:0]
		evt.LepPt3 = evt.LepPt3[:0]
		evt.LepPt4 = evt.LepPt4[:0]
		for j := 0; j < int(evt.NLep0); j++ {

			evt.LepPt0 = append(evt.LepPt0, 5*rnd.NormFloat64()+30)
			evt.LepPt1 = append(evt.LepPt1, 5*rnd.NormFloat64()+30)
			evt.LepPt2 = append(evt.LepPt2, 5*rnd.NormFloat64()+30)
			evt.LepPt3 = append(evt.LepPt3, 5*rnd.NormFloat64()+30)
			evt.LepPt4 = append(evt.LepPt4, 5*rnd.NormFloat64()+30)
		}

		if N > 10 && i%(N/10) == 0 && false {
			evt.NLep = 2
			evt.LepPt = []float32{1, 2}
		}

		_, err = w.Write()
		if err != nil {
			log.Fatalf("could not write evt=%d: %+v", i, err)
		}
	}

	err = w.Close()
	if err != nil {
		log.Fatalf("error: %+v", err)
	}

	err = f.Close()
	if err != nil {
		log.Fatalf("error: %+v", err)
	}
}

and the C++/ROOT program to read back:

// +build ignore

#include "TTree.h"
#include "TFile.h"

#include <iostream>

int main(int argc, char **argv) {
	auto f = TFile::Open(argv[1]);
	auto t = (TTree*)f->Get("truth");

	Long64_t evtNbr = 0;
	Int_t  runNbr = 0;

	Int_t d_nlep = 0;
	float d_lep_pt[20];

	Int_t d_nlep0 = 0;
	double d_lep_pt0[20];

	t->SetBranchStatus("*", 1);
	t->SetBranchAddress("runNumber", &runNbr);
	t->SetBranchAddress("eventNumber", &evtNbr);

	t->SetBranchAddress("d_nlep", &d_nlep);
	t->SetBranchAddress("d_lep_pt", d_lep_pt);

	t->SetBranchAddress("nlep0", &d_nlep0);
	t->SetBranchAddress("lep_pt0", d_lep_pt0);

	std::cout << "entries: " << t->GetEntries() << "\n";
	Long_t n = 0;
	Long_t nlep = 0;
	Long_t nlep0 = 0;
	for (int i = 0; i < t->GetEntries(); i++) {
		t->GetEntry(i);
		if (i%(t->GetEntries()/10) == 0) {
			std::cout << "entry: " << i << ", run=" << runNbr << "\n";
		}
	n++;
	nlep += d_nlep;
	nlep0 += d_nlep0;
	}
	std::cout << "n-leps:  " << nlep << "\n";
	std::cout << "n-leps0: " << nlep0 << "\n";
	std::cout << "entries: " << n << " read\n";

	return 0;
}

@rmadar this might be of interest for you as well.