db: avoid loading skiplist size during concurrent apply
CheranMahalingam opened this issue · comments
Currently, during the batch commit process we first attempt to reserve space in the current memtable through memTable.prepare
. Since we don't know the exact amount of space the batch will consume when written to the skiplist that backs the memtable, we reserve space using an upper bound. However, over time the amount of space reserved will not reflect the skiplist size. We combat this by setting the reserved value to the skiplist size. To do this safely we use the writerRefs
count to ensure that there are no concurrent apply operations. However, for memtable.prepare
we increment writerRefs
after reserving space which results in a potential race between memtable.prepare
and memtable.apply
.
func (m *memTable) prepare(batch *Batch) error {
avail := m.availBytes() // If there are no concurrent apply operations or ingests, writerRefs is 0
if batch.memTableSize > uint64(avail) {
return arenaskl.ErrArenaFull
}
m.reserved += uint32(batch.memTableSize)
m.writerRef()
return nil
}
...
func (m *memTable) apply(batch *Batch, seqNum uint64) error {
...
var ins arenaskl.Inserter
...
for r := batch.Reader(); ; seqNum++ {
kind, ukey, value, ok, err := r.Next()
if !ok {
if err != nil {
return err
}
break
}
ikey := base.MakeInternalKey(ukey, seqNum, kind)
switch kind {
...
default:
err = ins.Add(&m.skl, ikey, value)
}
if err != nil {
return err
}
}
...
}
...
func (m *memTable) availBytes() uint32 {
a := m.skl.Arena()
if m.writerRefs.Load() == 1 {
// If there are no other concurrent apply operations, we can update the
// reserved bytes setting to accurately reflect how many bytes of been
// allocated vs the over-estimation present in memTableEntrySize.
m.reserved = a.Size()
}
return a.Capacity() - m.reserved
}
The next step here is to write a unit test to repro.
Relevant slack thread with more context: https://cockroachlabs.slack.com/archives/CAC6K3SLU/p1713807762229579