[BUG]OrderBy not using index
mirror222 opened this issue · comments
minghui zhou commented
LiteDB 5.0.18
Net 8.0/6.0
I have a large table with over 500,000 records, and I have created indexes on the LabelIds and InternalDate fields. I am now encountering a performance issue, as shown in the following code:
internal class CThreads
{
[BsonId]
public int UID { get; set; }
public List<string> LabelIds { get; set; } = new List<string>();
public string From { get; set; }
public string Subject { get; set; }
public string Snippet { get; set; }
public long InternalDate { get; set; }
public int IsDeleted { get; set; }
}
var connStr = new ConnectionString(@"filename='test5.db9'");
using (var _conn = new LiteDatabase(connStr))
{
var _dbThreads = _conn.GetCollection<CThreads>();
_dbThreads.EnsureIndex("date_index", x => x.InternalDate);
_dbThreads.EnsureIndex("label_index", x => x.LabelIds);
_dbThreads.EnsureIndex("subject_index", x => x.Subject);
_conn.BeginTrans();
var items = new List<CThreads>();
for (var i = 100000; i < 600000; i++)
{
var item = new CThreads()
{
LabelIds = new List<string> { "NOTICE" },
From = "Customer " + i.ToString(),
Subject = "Subject " + i.ToString(),
Snippet = "Snippet data " + i.ToString(),
InternalDate = 1708300201000 + i * 10,
IsDeleted = 0
};
items.Add(item);
if (i % 50000 == 0)
{
_dbThreads.InsertBulk(items);
items.Clear();
}
}
_conn.Commit();
//Where only test
var watch = Stopwatch.StartNew();
watch.Start();
var query = _dbThreads.Query().Where(x => x.LabelIds.Contains("NOTICE"));
var results = query.Skip(1000).Limit(15).ToList();
watch.Stop();
Console.WriteLine($"Where ONLY : {watch.ElapsedMilliseconds} ms ");
//OrderBy only test
watch.Restart();
query = _dbThreads.Query();
query = query.OrderByDescending(x => x.InternalDate);
results = query.Skip(1000).Limit(15).ToList();
watch.Stop();
Console.WriteLine($"OrderBy ONLY: {watch.ElapsedMilliseconds} ms ");
//Where and OrderBy test
watch.Restart();
query = _dbThreads.Query();
query = query.Where(x => x.LabelIds.Contains("NOTICE")).OrderByDescending(x => x.InternalDate);
results = query.Skip(1000).Limit(15).ToList();
watch.Stop();
Console.WriteLine($"Both Where and OrderBy : {watch.ElapsedMilliseconds} ms ");
}
The execution results are shown in the following figure:
I guess that LiteDB first uses the index to perform the conditional query, and then sorts (FileSort)?
Mauricio David commented
Hi @mirror222
I guess that LiteDB first uses the index to perform the conditional query, and then sorts (FileSort)?
That's it. You can see explain plan in your query using EXPLAIN (in sql) or using GetPlan()
var queryPlan = _dbThreads.Query().Where(x => x.LabelIds.Contains("NOTICE")).GetPlan();
Print(queryPlan);
void Print(BsonDocument doc)
{
var sb = new StringBuilder();
var sw = new StringWriter(sb);
var w = new JsonWriter(sw);
w.Pretty = true;
w.Serialize(doc);
Console.WriteLine(sb);
}
{
"collection": "CThreads",
"snaphost": "read",
"pipe": "queryPipe",
"index":
{
"name": "label_index",
"expr": "$.LabelIds[*]",
"order": 1,
"mode": "INDEX SEEK(label_index = \"NOTICE\")",
"cost": 10
},
"lookup":
{
"loader": "document",
"fields": "$"
},
"select":
{
"expr": "$",
"all": false
}
}
Where ONLY : 26 ms
{
"collection": "CThreads",
"snaphost": "read",
"pipe": "queryPipe",
"index":
{
"name": "date_index",
"expr": "$.InternalDate",
"order": -1,
"mode": "FULL INDEX SCAN(date_index)",
"cost": 100
},
"lookup":
{
"loader": "document",
"fields": "$"
},
"select":
{
"expr": "$",
"all": false
}
}
OrderBy ONLY: 5 ms
{
"collection": "CThreads",
"snaphost": "read",
"pipe": "queryPipe",
"index":
{
"name": "label_index",
"expr": "$.LabelIds[*]",
"order": 1,
"mode": "INDEX SEEK(label_index = \"NOTICE\")",
"cost": 10
},
"lookup":
{
"loader": "document",
"fields": "$"
},
"orderBy":
{
"expr": "$.InternalDate",
"order": -1
},
"select":
{
"expr": "$",
"all": false
}
}
Both Where and OrderBy : 1239 ms