groot: long-running baskets crossing small/big file threshold are incorrectly handled
sbinet opened this issue · comments
long running baskets, ie: baskets that are created when a TFile is "small" (<2Gb) but are flushed when TFile is "big", are incorrectly handled.
namedly, the TKey size difference stemming from the 32b -> 64b
offsets, incur inconsistencies, leading to a crash in root-dump
and this kind of message in ROOT/C++:
Warning in <TBasket::ReadBasketBuffers>: basket:d_lep_pt has fNevBuf=10000000 but fEntryOffset=0, pos=2766185783, len=40000084, fNbytes=40000084, fObjlen=40000008, trying to repair
simple program to reproduce:
// +build ignore
package main
import (
"log"
"math/rand"
"go-hep.org/x/hep/groot"
"go-hep.org/x/hep/groot/riofs"
"go-hep.org/x/hep/groot/rtree"
)
func main() {
f, err := groot.Create("o.root", riofs.WithoutCompression())
if err != nil {
log.Fatalf("error: %+v", err)
}
defer f.Close()
var evt struct {
Run int32 `groot:"runNumber"`
Evt int64 `groot:"eventNumber"`
NLep int32 `groot:"d_nlep"`
LepPt []float32 `groot:"d_lep_pt[d_nlep]"`
NLep0 int32 `groot:"nlep0"`
LepPt0 []float64 `groot:"lep_pt0[nlep0]"`
NLep1 int32 `groot:"nlep1"`
LepPt1 []float64 `groot:"lep_pt1[nlep1]"`
NLep2 int32 `groot:"nlep2"`
LepPt2 []float64 `groot:"lep_pt2[nlep2]"`
NLep3 int32 `groot:"nlep3"`
LepPt3 []float64 `groot:"lep_pt3[nlep3]"`
NLep4 int32 `groot:"nlep4"`
LepPt4 []float64 `groot:"lep_pt4[nlep4]"`
}
evt.LepPt = make([]float32, 0, 20)
evt.LepPt0 = make([]float64, 0, 20)
evt.LepPt1 = make([]float64, 0, 20)
evt.LepPt2 = make([]float64, 0, 20)
evt.LepPt3 = make([]float64, 0, 20)
evt.LepPt4 = make([]float64, 0, 20)
wvars := rtree.WriteVarsFromStruct(&evt)
w, err := rtree.NewWriter(f, "truth", wvars)
if err != nil {
log.Fatalf("error: %+v", err)
}
defer w.Close()
rnd := rand.New(rand.NewSource(1234))
var N int = 1e7
for i := 0; i < N; i++ {
if N > 10 && i%(N/10) == 0 {
log.Printf("evt: %d...", i)
}
evt.Run = int32(i)
evt.Evt = int64(i)
evt.NLep = 0
evt.NLep0 = int32(rnd.Intn(10) + 1)
evt.NLep1 = evt.NLep0
evt.NLep2 = evt.NLep0
evt.NLep3 = evt.NLep0
evt.NLep4 = evt.NLep0
evt.LepPt = evt.LepPt[:0]
evt.LepPt0 = evt.LepPt0[:0]
evt.LepPt1 = evt.LepPt1[:0]
evt.LepPt2 = evt.LepPt2[:0]
evt.LepPt3 = evt.LepPt3[:0]
evt.LepPt4 = evt.LepPt4[:0]
for j := 0; j < int(evt.NLep0); j++ {
evt.LepPt0 = append(evt.LepPt0, 5*rnd.NormFloat64()+30)
evt.LepPt1 = append(evt.LepPt1, 5*rnd.NormFloat64()+30)
evt.LepPt2 = append(evt.LepPt2, 5*rnd.NormFloat64()+30)
evt.LepPt3 = append(evt.LepPt3, 5*rnd.NormFloat64()+30)
evt.LepPt4 = append(evt.LepPt4, 5*rnd.NormFloat64()+30)
}
if N > 10 && i%(N/10) == 0 && false {
evt.NLep = 2
evt.LepPt = []float32{1, 2}
}
_, err = w.Write()
if err != nil {
log.Fatalf("could not write evt=%d: %+v", i, err)
}
}
err = w.Close()
if err != nil {
log.Fatalf("error: %+v", err)
}
err = f.Close()
if err != nil {
log.Fatalf("error: %+v", err)
}
}
and the C++/ROOT program to read back:
// +build ignore
#include "TTree.h"
#include "TFile.h"
#include <iostream>
int main(int argc, char **argv) {
auto f = TFile::Open(argv[1]);
auto t = (TTree*)f->Get("truth");
Long64_t evtNbr = 0;
Int_t runNbr = 0;
Int_t d_nlep = 0;
float d_lep_pt[20];
Int_t d_nlep0 = 0;
double d_lep_pt0[20];
t->SetBranchStatus("*", 1);
t->SetBranchAddress("runNumber", &runNbr);
t->SetBranchAddress("eventNumber", &evtNbr);
t->SetBranchAddress("d_nlep", &d_nlep);
t->SetBranchAddress("d_lep_pt", d_lep_pt);
t->SetBranchAddress("nlep0", &d_nlep0);
t->SetBranchAddress("lep_pt0", d_lep_pt0);
std::cout << "entries: " << t->GetEntries() << "\n";
Long_t n = 0;
Long_t nlep = 0;
Long_t nlep0 = 0;
for (int i = 0; i < t->GetEntries(); i++) {
t->GetEntry(i);
if (i%(t->GetEntries()/10) == 0) {
std::cout << "entry: " << i << ", run=" << runNbr << "\n";
}
n++;
nlep += d_nlep;
nlep0 += d_nlep0;
}
std::cout << "n-leps: " << nlep << "\n";
std::cout << "n-leps0: " << nlep0 << "\n";
std::cout << "entries: " << n << " read\n";
return 0;
}
@rmadar this might be of interest for you as well.