dashbitco / nimble_parsec

A simple and fast library for text-based parser combinators

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

takeP equivalent

foresttoney opened this issue · comments

I often find myself needing to take the next n bytes of the input. For a contrived but representative example:

defparsec(
  :example_parser,
  string("00")   
  |> ignore(utf8_string([], 5))
  |> integer(1),
  debug: true
)

The issue is that when using something like ascii_string or utf8_string, you get quite a bit of unnecessary inlining.

defp example_parser__0(
  <<"00", x0::utf8, x1::utf8, x2::utf8, x3::utf8, x4::utf8, x5, rest::binary>>,
  acc,
  stack,
  context,
  comb__line,
  comb__offset
) when x5 >= 48 and x5 <= 57 do
  example_parser__1(
  rest,
  [x5 - 48, "00"] ++ acc,
  stack,
  context,
  (
    line =
      (
        line =
          (
            line =
              (
                line =
                  (
                    line = comb__line

                    case x0 do
                      10 -> {elem(line, 0) + 1, comb__offset + 2 + byte_size(<<x0::utf8>>)}
                      _ -> line
                    end
                  )

                case x1 do
                  10 ->
                    {elem(line, 0) + 1,
                     comb__offset + 2 + byte_size(<<x0::utf8>>) + byte_size(<<x1::utf8>>)}

                  _ ->
                    line
                end
              )

            case x2 do
              10 ->
                {elem(line, 0) + 1,
                 comb__offset + 2 + byte_size(<<x0::utf8>>) + byte_size(<<x1::utf8>>) +
                   byte_size(<<x2::utf8>>)}

              _ ->
                line
            end
          )

        case x3 do
          10 ->
            {elem(line, 0) + 1,
             comb__offset + 2 + byte_size(<<x0::utf8>>) + byte_size(<<x1::utf8>>) +
               byte_size(<<x2::utf8>>) + byte_size(<<x3::utf8>>)}

          _ ->
            line
        end
      )

    case x4 do
      10 ->
        {elem(line, 0) + 1,
         comb__offset + 2 + byte_size(<<x0::utf8>>) + byte_size(<<x1::utf8>>) +
           byte_size(<<x2::utf8>>) + byte_size(<<x3::utf8>>) + byte_size(<<x4::utf8>>)}

      _ ->
        line
    end
  ),
  comb__offset + 2 + byte_size(<<x0::utf8>>) + byte_size(<<x1::utf8>>) + byte_size(<<x2::utf8>>) +
    byte_size(<<x3::utf8>>) + byte_size(<<x4::utf8>>) + 1
)
end

defp example_parser__0(rest, _acc, _stack, context, line, offset) do
  {:error,
 "expected string \"00\", followed by utf8 codepoint, followed by utf8 codepoint, followed by utf8 codepoint, followed by utf8 codepoint, followed by utf8 codepoint, followed by ASCII character in the range \"0\" to \"9\"",
 rest, context, line, offset}
end

defp example_parser__1(rest, acc, _stack, context, line, offset) do
  {:ok, acc, rest, context, line, offset}
end

I think it would make sense to have something like takeP from megaparsec. Perhaps it would be called byte and it would compile to something like:

defparsec(
  :example_parser,
  string("00")   
  |> ignore(byte(5))
  |> integer(1),
  debug: true
)

defp example_parser__0(
  <<"00", x0::binary-size(5), x1, rest::binary>>,
  acc,
  stack,
  context,
  comb__line,
  comb__offset
) when x1 >= 48 and x1 <= 57 do
  example_parser__1(
  rest,
  [x1 - 48, "00"] ++ acc,
  stack,
  context, ...

Like takeP, it would return a parse error if there weren't enough tokens (the pattern match fails). I know that compile times can grow when parsing fixed-width files, so perhaps this might help.

Yeah, #30 was meant to add it, but I never got around it. If you want to send a PR, it will be welcome!

Yeah - this is exactly right. I haven't spent much time diving into the library internals, but I will see what I can come up with.

Closing in favor of PR!