add some basic generators for strings
nikhedonia opened this issue · comments
i'd like to see a 'trine-based' parsers in the future. But first we need some basic functionality for strings. I have something like this in mind
var str ="hello world from trine"
str::chars() // yields each character
str::chars()::concatUntill(' ');
/ * concatenates chars untill an whitespace is found
* and yields the substring
* result here : 'hello', 'world', 'from', 'trine'
* i guess this can be also be done with multiple passes of takeUntill and reduce...
* /
This must be well thought out, but at least chars() is a must have
Strings are actually iterable out of the box:
"use strict";
import { sortAlphabetically } from "trine/iterable/sortAlphabetically";
import { uniq } from "trine/iterable/uniq";
import { reduce } from "trine/iterable/reduce";
import { equals } from "trine/boolean/equals";
var chars = "fookasdoASDIJQWIJASDIASXIMASXIMASDWIQ"
::sortAlphabetically()
::uniq(equals)
::reduce(String.prototype.concat, "")
.next()
.value;
console.log(chars);
Hmm, interesting idea. Maybe something more generic though, so it can be used for non-strings, and things other than concatenation as well, but I'm not sure how that would look/work...
There should probably be a ::to
overload for String
s btw, since they're a collection type too (unfortunately the only collection type in JS that is treated as a value).
One primitive for parsing (and other things) could be the concept of a peekable (I just made that up, but I hope it's rather self-explanatory). Basically would allow peeking and advancing, while yielding tokens. Here's a crude example implementation:
"use strict";
import { takeWhile } from "trine/iterable/takeWhile";
import { drop } from "trine/iterable/drop";
import { map } from "trine/iterable/map";
function * peekable <T> (
tokenizer : (_this : { position: number }, peek: () => Iterable<T>) => [number, T],
) : Iterable<T> {
const iterator = this[Symbol.iterator]();
let position = 0;
let done = false;
let buffer = [];
function * peek () {
yield * buffer;
while ( true ) {
const item = iterator.next();
if ( item.done ) {
done = true;
return;
}
buffer.push(item.value);
yield item.value;
}
}
while ( !done || buffer.length > 0 ) {
const [advanceCount, token] = ({ position })::tokenizer(peek);
if ( !( advanceCount > 0 ) ) {
throw new Error("Infinite loop!");
}
yield token;
buffer = buffer.slice(advanceCount);
position += advanceCount;
}
}
function parseWhitespace (peek) {
const spaces = [...peek()::takeWhile(function () { return this === " "; })];
if ( spaces.length > 0 ) {
return {
type: "whitespace",
start: this.position,
end: this.position + spaces.length,
};
}
return null;
}
function parseWord (peek) {
const word = [...peek()::takeWhile(function () { return this !== " "; })].join("");
if ( word.length > 0 ) {
return {
type: "word",
value: word,
start: this.position,
end: this.position + word.length,
};
}
return null;
}
var tokens = "hello world from trine"
::peekable(function parser (peek) {
let token;
token = this::parseWord(peek);
if ( !token ) { token = this::parseWhitespace(peek); }
if ( !token ) {
throw new SyntaxError("Unexpected `" + peek().next().value + "` at " + this.position);
}
return [token.end - token.start, token];
})
::drop(function () { return this.type === "whitespace"; })
::map(function () { return this.value; });
console.log([...tokens]);
While writing this, I realized we really need dropHead
and dropTail
functions. :/