level-search
dominictarr opened this issue · comments
hi,
I just noticed this module yesterday,
which was exciting, because I had just implemented a
search api for level
.
basically, each pair of keys in every document are indexed:
{ name: 'foo',
repository: { type: 'git', url: 'https://github.com/foofoo/foo.git'},
dependencies: {
bar: '1.0.0'
}
}
would index that file by the pairs:
name foo
repository type
repository url
type git
url https://github.com/dominictarr/level-search
dependencies bar
bar 1.0.0
this makes it possible to do nearly any search,
you could probably get really clever, but as a first step,
I just search on the pair that seems the least likely,
and then filter.
is there a way to get the keys involved in the query from jsonquery so that I could combine it with level-search?
With LevelUp you can specify whether you'd like the levelup keys and values to be present in the pipe()
. If keys is true then the key can be used for the filtering.
Here's an example:
var levelup = require('levelup')
, rimraf = require('rimraf')
, jsonquery = require('jsonquery');
var dbPath = __dirname + '/data/mydb';
rimraf.sync(dbPath);
var db = levelup(dbPath, {
valueEncoding: 'json'
});
var batch = [];
for (var i = 0; i < 100; i++) {
var obj = {
name: 'name ' + i,
num: 10*i
};
batch.push({ type: 'put', key: i, value: obj });
}
db.batch(batch, function (err) {
query();
});
function query() {
db
.createReadStream({ keys: true, values: true })
.pipe(jsonquery({ key: { $gte: 90 }, 'value.num': { $mod: [ 20, 0 ] } }))
.on('data', console.log);
}
Which will print out:
$ node test.js
{ key: '90', value: { name: 'name 90', num: 900 } }
{ key: '92', value: { name: 'name 92', num: 920 } }
{ key: '94', value: { name: 'name 94', num: 940 } }
{ key: '96', value: { name: 'name 96', num: 960 } }
{ key: '98', value: { name: 'name 98', num: 980 } }
Is that the sort of thing you were after?
In any case you can see the full lists of tests here
As long as the keys and the values are a javascript objects and the data
objects from the stream is a javascript object, then jsonquery will be able to filter it.
Hmm, I think I didn't explain level-search
very well.
level-search indexes json documents in levelup by each pair of keys.
so, if I want to search for objects that have {name: 'jsonquery'}
(i.e. get the package.json for all versions of jsonquery
,
That is only 5 versions, but if I was just gonna use jsonquery against the whole of npm... that is 172000 package.json
(per module version) of course. Clearly, it's gonna be way faster to query just 5 objects.
So, If i did a query, {'value.name': 'jsonquery', 'dist.mtime': '$max'}
hmm, I need to figure out which are the best keys to use for the query. (in this case: "name", "jsonquery"
would be the right choice)
So, I was wondering if there was an easy way that jsonquery could give the pairs I want to look for,
like is there an intermediate data structure jsonquery creates that I can use?
Am i making sense?
OK. I see what you mean. You want to take a JSON query, and then get a list of the keys that would be used for the query.
There's nothing really in JSON query in terms of internal data structures that would help you.
But I was in the process of writing a query optimizer system (JSONQueryOptimizer - not released yet) that would feed into this, so you could define indexes on level up (do an ensureIndex
on a field in your JSON data, have levelup index it using a sublevel), and then pass a JSON query, with a list of indexes, and it would tell you what indexes to use, then feed the stream of potential data into JSON query to get the actual results.
Here's what I was working on (sorry it's on coffeescript, was going through a phase 😄)
# query optimizer
optimizer = (indexes, query, callback) ->
plan = []
task = optimizerMatch indexes, query
plan.push task if task
# console.log util.inspect(plan, false, null)
callback null, plan
optimizerMatch = (indexes, predicate) ->
for n, v of predicate
# LHS $and, $or, $not
if n[0] == '$'
task = optimizerOperator indexes, n, v
return task if task
# complex RHS predicate (eg. $in, $gt)
else if v.constructor == Object
task = optimizeValOpMatch(indexes, n, v)
return task if task
# simple lookup
else
task = {}
task[n] ?= []
task[n].push v
return task if n in indexes
optimizerOperator = (indexes, op, predicate) ->
switch op
when '$or', '$and'
tasks = []
tableScan = false
for part in predicate
t = optimizerMatch(indexes, part)
if t is undefined and op == '$or'
tableScan = true
break
else
tasks.push t if t
if tableScan
return null
else if tasks.length > 1
task = {}
task[op] = tasks
return task
else if tasks.length == 1
return tasks[0]
when '$not'
task = optimizerMatch(indexes, predicate)
return task if task
# process complex RHS predicates (eg. $in, $gt)
optimizeValOpMatch = (indexes, val, predicate) ->
for n, v of predicate
# keys must be an operator
if n[0] == '$'
task = optimizeValOp(indexes, n, val, v)
return task if task
# operators on the RHS of queries
optimizeValOp = (indexes, op, val, args) ->
switch op
when '$in'
task = {}
task[val] = args
return task if val in indexes
when '$nin', '$gt', '$gte', '$ne', '$lt', '$lte', '$mod'
# can't use indexes
task = {}
task[val] = null #index scan
return task if val in indexes
when '$all'
if val in indexes
tasks = []
for part in args
task = {}
task[val] = part
tasks.push task
if tasks.length > 1
task = { $and: tasks }
return task
else if tasks.length == 1
return tasks[0]
when '$elemMatch'
tasks = []
for part, v of args
key = [ val, part ].join('.')
if key in indexes
task = {}
task[key] = v
tasks.push task
if tasks.length > 1
task = { $and: tasks }
return task
else if tasks.length == 1
return tasks[0]
when '$or', '$and'
tasks = []
for part in args
task = optimizeValOpMatch(indexes, val, part)
tasks.push task if task
if tasks.length > 1
task = {}
task[op] = tasks
return task
else if tasks.length == 1
return tasks[0]
when '$not'
task = optimizeValOpMatch(indexes, val, args)
return task if task
module.exports = optimizer
used like this:
var jqo = require('./lib/jsonqueryoptimizer');
var indexes = ['name', 'phone', 'num'];
var query = { $and: [ { name: 'Eugene' }, { num: 423 } ] };
jqo(indexes, query, function (err, plan) {
// plan is: [{"$and":[{"name":["Eugene"]},{"num":[423]}]}]
// which is:
// get a stream using the "name" index (looking up with key "Eugene"), and
// then AND it with the stream using the num index with key 423
});
Probably not what you need, but it might be a starting point to get what you need.