mbdavid / LiteDB

LiteDB - A .NET NoSQL Document Store in a single data file

Home Page:http://www.litedb.org

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

[BUG]OrderBy not using index

mirror222 opened this issue · comments

LiteDB 5.0.18
Net 8.0/6.0

I have a large table with over 500,000 records, and I have created indexes on the LabelIds and InternalDate fields. I am now encountering a performance issue, as shown in the following code:

internal class CThreads
{
        [BsonId]
        public int UID { get; set; }       
        public List<string> LabelIds { get; set; } = new List<string>();
        public string From { get; set; }
        public string Subject { get; set; }
        public string Snippet { get; set; }
        public long InternalDate { get; set; }
        public int IsDeleted { get; set; }
}
var connStr = new ConnectionString(@"filename='test5.db9'");

using (var _conn = new LiteDatabase(connStr))
{

    var _dbThreads = _conn.GetCollection<CThreads>();
    _dbThreads.EnsureIndex("date_index", x => x.InternalDate);
    _dbThreads.EnsureIndex("label_index", x => x.LabelIds);
    _dbThreads.EnsureIndex("subject_index", x => x.Subject);

    _conn.BeginTrans();
    var items = new List<CThreads>();
    for (var i = 100000; i < 600000; i++)
    {
        var item = new CThreads()
        {
            LabelIds = new List<string> { "NOTICE" },
            From = "Customer " + i.ToString(),
            Subject = "Subject " + i.ToString(),
            Snippet = "Snippet data " + i.ToString(),
            InternalDate = 1708300201000 + i * 10,
            IsDeleted = 0
        };
        items.Add(item);
        if (i % 50000 == 0)
        {
            _dbThreads.InsertBulk(items);
            items.Clear();
        }
    }
    _conn.Commit();

    //Where only test
    var watch = Stopwatch.StartNew();
    watch.Start();
    var query = _dbThreads.Query().Where(x => x.LabelIds.Contains("NOTICE"));               
    var results = query.Skip(1000).Limit(15).ToList();                 
    watch.Stop();
    Console.WriteLine($"Where ONLY : {watch.ElapsedMilliseconds} ms "); 

    //OrderBy only test
    watch.Restart();
    query = _dbThreads.Query();
    query = query.OrderByDescending(x => x.InternalDate);
    results = query.Skip(1000).Limit(15).ToList();
    watch.Stop();
    Console.WriteLine($"OrderBy ONLY: {watch.ElapsedMilliseconds} ms "); 

    //Where and OrderBy test
    watch.Restart();
    query = _dbThreads.Query();
    query = query.Where(x => x.LabelIds.Contains("NOTICE")).OrderByDescending(x => x.InternalDate);
    results = query.Skip(1000).Limit(15).ToList();
    watch.Stop();
    Console.WriteLine($"Both Where and OrderBy : {watch.ElapsedMilliseconds} ms "); 

}

The execution results are shown in the following figure:

image

I guess that LiteDB first uses the index to perform the conditional query, and then sorts (FileSort)?

Hi @mirror222

I guess that LiteDB first uses the index to perform the conditional query, and then sorts (FileSort)?

That's it. You can see explain plan in your query using EXPLAIN (in sql) or using GetPlan()

var queryPlan = _dbThreads.Query().Where(x => x.LabelIds.Contains("NOTICE")).GetPlan();
Print(queryPlan);

void Print(BsonDocument doc)
{
    var sb = new StringBuilder();
    var sw = new StringWriter(sb);
    var w = new JsonWriter(sw);
    w.Pretty = true;
    w.Serialize(doc);
    Console.WriteLine(sb);
}
{
    "collection": "CThreads",
    "snaphost": "read",
    "pipe": "queryPipe",
    "index":
    {
        "name": "label_index",
        "expr": "$.LabelIds[*]",
        "order": 1,
        "mode": "INDEX SEEK(label_index = \"NOTICE\")",
        "cost": 10
    },
    "lookup":
    {
        "loader": "document",
        "fields": "$"
    },
    "select":
    {
        "expr": "$",
        "all": false
    }
}
Where ONLY : 26 ms
{
    "collection": "CThreads",
    "snaphost": "read",
    "pipe": "queryPipe",
    "index":
    {
        "name": "date_index",
        "expr": "$.InternalDate",
        "order": -1,
        "mode": "FULL INDEX SCAN(date_index)",
        "cost": 100
    },
    "lookup":
    {
        "loader": "document",
        "fields": "$"
    },
    "select":
    {
        "expr": "$",
        "all": false
    }
}
OrderBy ONLY: 5 ms
{
    "collection": "CThreads",
    "snaphost": "read",
    "pipe": "queryPipe",
    "index":
    {
        "name": "label_index",
        "expr": "$.LabelIds[*]",
        "order": 1,
        "mode": "INDEX SEEK(label_index = \"NOTICE\")",
        "cost": 10
    },
    "lookup":
    {
        "loader": "document",
        "fields": "$"
    },
    "orderBy":
    {
        "expr": "$.InternalDate",
        "order": -1
    },
    "select":
    {
        "expr": "$",
        "all": false
    }
}
Both Where and OrderBy : 1239 ms