jussi-kalliokoski / trine

A utility library for modern JavaScript.

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

add some basic generators for strings

nikhedonia opened this issue · comments

i'd like to see a 'trine-based' parsers in the future. But first we need some basic functionality for strings. I have something like this in mind

var str ="hello world from trine"
str::chars() // yields each character 
str::chars()::concatUntill(' ');
/ *  concatenates chars untill an whitespace is found
  *  and yields the substring
  *  result here :  'hello', 'world', 'from', 'trine'
  *  i guess this can be also be done with multiple passes of takeUntill and reduce...
  * /

This must be well thought out, but at least chars() is a must have

Strings are actually iterable out of the box:

"use strict";

import { sortAlphabetically } from "trine/iterable/sortAlphabetically";
import { uniq } from "trine/iterable/uniq";
import { reduce } from "trine/iterable/reduce";
import { equals } from "trine/boolean/equals";

var chars = "fookasdoASDIJQWIJASDIASXIMASXIMASDWIQ"
    ::sortAlphabetically()
    ::uniq(equals)
    ::reduce(String.prototype.concat, "")
    .next()
    .value;

console.log(chars);

Hmm, interesting idea. Maybe something more generic though, so it can be used for non-strings, and things other than concatenation as well, but I'm not sure how that would look/work...

There should probably be a ::to overload for Strings btw, since they're a collection type too (unfortunately the only collection type in JS that is treated as a value).

One primitive for parsing (and other things) could be the concept of a peekable (I just made that up, but I hope it's rather self-explanatory). Basically would allow peeking and advancing, while yielding tokens. Here's a crude example implementation:

"use strict";

import { takeWhile } from "trine/iterable/takeWhile";
import { drop } from "trine/iterable/drop";
import { map } from "trine/iterable/map";

function * peekable <T> (
    tokenizer : (_this : { position: number }, peek: () => Iterable<T>) => [number, T],
) : Iterable<T> {
    const iterator = this[Symbol.iterator]();
    let position = 0;
    let done = false;
    let buffer = [];

    function * peek () {
        yield * buffer;

        while ( true ) {
            const item = iterator.next();

            if ( item.done ) {
                done = true;
                return;
            }

            buffer.push(item.value);
            yield item.value;
        }
    }

    while ( !done || buffer.length > 0 ) {

        const [advanceCount, token] = ({ position })::tokenizer(peek);

        if ( !( advanceCount > 0 ) ) {
            throw new Error("Infinite loop!");
        }

        yield token;

        buffer = buffer.slice(advanceCount);
        position += advanceCount;
    }
}

function parseWhitespace (peek) {
    const spaces = [...peek()::takeWhile(function () { return this === " "; })];

    if ( spaces.length > 0 ) {
        return {
            type: "whitespace",
            start: this.position,
            end: this.position + spaces.length,
        };
    }

    return null;
}

function parseWord (peek) {
    const word = [...peek()::takeWhile(function () { return this !== " "; })].join("");

    if ( word.length > 0 ) {
        return {
            type: "word",
            value: word,
            start: this.position,
            end: this.position + word.length,
        };
    }

    return null;
}

var tokens = "hello world from trine"
    ::peekable(function parser (peek) {
        let token;

        token = this::parseWord(peek);
        if ( !token ) { token = this::parseWhitespace(peek); }

        if ( !token ) {
            throw new SyntaxError("Unexpected `" + peek().next().value + "` at " + this.position);
        }

        return [token.end - token.start, token];
    })
    ::drop(function () { return this.type === "whitespace"; })
    ::map(function () { return this.value; });

console.log([...tokens]);

While writing this, I realized we really need dropHead and dropTail functions. :/