Panic on TIMESTAMP-returning queries over HTTP with more than one value
mildbyte opened this issue · comments
Running this query:
curl -XPOST localhost:8080/q -H "Content-Type: application/json" -d@- <<EOF
{"query": "
WITH ts AS (
SELECT * FROM (VALUES ('2020-01-01 00:00:00'::timestamp)) AS v
)
(SELECT * FROM ts) UNION ALL (SELECT * FROM ts)
"}
EOF
results in a Trying to access an element at index 1 from a PrimitiveArray of length 1
panic:
thread 'tokio-runtime-worker' panicked at 'Trying to access an element at index 1 from a PrimitiveArray of length 1', /home/mildbyte/.cargo/git/checkouts/arrow-rs-aeceac26ba73ca43/57f79c0/arrow-array/src/array/primitive_array.rs:337:9
stack backtrace:
0: 0x56060d8d214a - std::backtrace_rs::backtrace::libunwind::trace::h08bd4b4334e680d9
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
1: 0x56060d8d214a - std::backtrace_rs::backtrace::trace_unsynchronized::hf594b03fffc7c3a4
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
2: 0x56060d8d214a - std::sys_common::backtrace::_print_fmt::h1134a35071387263
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/sys_common/backtrace.rs:65:5
3: 0x56060d8d214a - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h69f04f53733d3891
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/sys_common/backtrace.rs:44:22
4: 0x56060d900bff - core::fmt::write::heb1c797211b5fb3d
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/core/src/fmt/mod.rs:1254:17
5: 0x56060d8cd665 - std::io::Write::write_fmt::h0753f8e473762982
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/io/mod.rs:1698:15
6: 0x56060d8d1f15 - std::sys_common::backtrace::_print::h91ef833f3b8da05b
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/sys_common/backtrace.rs:47:5
7: 0x56060d8d1f15 - std::sys_common::backtrace::print::h95ac49619fd683a6
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/sys_common/backtrace.rs:34:9
8: 0x56060d8d374e - std::panicking::default_hook::{{closure}}::hd0ccf67e3d41bed4
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/panicking.rs:269:22
9: 0x56060d8d34f5 - std::panicking::default_hook::he9eeede79fcc44f5
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/panicking.rs:288:9
10: 0x56060d8d3cae - std::panicking::rust_panic_with_hook::hfee790920b2b90a3
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/panicking.rs:690:13
11: 0x56060d8d3ba9 - std::panicking::begin_panic_handler::{{closure}}::h3bfe84e5bbd52252
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/panicking.rs:581:13
12: 0x56060d8d25b6 - std::sys_common::backtrace::__rust_end_short_backtrace::hd97ff4e01dc5cc20
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/sys_common/backtrace.rs:150:18
13: 0x56060d8d3902 - rust_begin_unwind
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/std/src/panicking.rs:577:5
14: 0x560606d2d0d3 - core::panicking::panic_fmt::h6a7ef2d25e2f2c88
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/core/src/panicking.rs:67:14
15: 0x56060d1ea504 - arrow_array::array::primitive_array::PrimitiveArray<T>::value::h06c4a531e630fa22
at /home/mildbyte/.cargo/git/checkouts/arrow-rs-aeceac26ba73ca43/57f79c0/arrow-array/src/array/primitive_array.rs:337:9
16: 0x56060c596977 - <&arrow_array::array::primitive_array::PrimitiveArray<T> as arrow_array::array::ArrayAccessor>::value::h3f301b24e10056e7
at /home/mildbyte/.cargo/git/checkouts/arrow-rs-aeceac26ba73ca43/57f79c0/arrow-array/src/array/primitive_array.rs:711:9
17: 0x56060c6265a8 - <&arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::TimestampNanosecondType> as arrow_cast::display::DisplayIndexState>::write::hbab3f37a9240d8fa
at /home/mildbyte/.cargo/git/checkouts/arrow-rs-aeceac26ba73ca43/57f79c0/arrow-cast/src/display.rs:465:29
18: 0x56060c699d1e - <arrow_cast::display::ArrayFormat<F> as arrow_cast::display::DisplayIndex>::write::h7aa0695156911352
at /home/mildbyte/.cargo/git/checkouts/arrow-rs-aeceac26ba73ca43/57f79c0/arrow-cast/src/display.rs:361:9
19: 0x56060c68f7b1 - <arrow_cast::display::ValueFormatter as core::fmt::Display>::fmt::ha5bc81ef21ea99da
at /home/mildbyte/.cargo/git/checkouts/arrow-rs-aeceac26ba73ca43/57f79c0/arrow-cast/src/display.rs:162:15
20: 0x56060c68f4a9 - <T as alloc::string::ToString>::to_string::heccf163b66441211
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/alloc/src/string.rs:2532:9
21: 0x56060c0ac717 - arrow_json::writer::set_column_for_json_rows::{{closure}}::h420f7f3184b60b76
at /home/mildbyte/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-json-33.0.0/src/writer.rs:309:25
22: 0x56060c06d18c - core::iter::traits::iterator::Iterator::for_each::call::{{closure}}::ha515900963170f28
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/core/src/iter/traits/iterator.rs:860:29
23: 0x56060c07fa75 - <core::iter::adapters::enumerate::Enumerate<I> as core::iter::traits::iterator::Iterator>::fold::enumerate::{{closure}}::h489b852badad06e2
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/core/src/iter/adapters/enumerate.rs:107:27
24: 0x56060c0a06d1 - core::iter::traits::iterator::Iterator::fold::h6ff9366481b33815
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/core/src/iter/traits/iterator.rs:2488:21
25: 0x56060c07f9eb - <core::iter::adapters::enumerate::Enumerate<I> as core::iter::traits::iterator::Iterator>::fold::hb8ad7f1bf610321b
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/core/src/iter/adapters/enumerate.rs:113:9
26: 0x56060c081cd6 - core::iter::traits::iterator::Iterator::for_each::h048eac0c33342e69
at /rustc/700938c0781c9f135244bb1ec846fe1a5e03ae7d/library/core/src/iter/traits/iterator.rs:863:9
27: 0x56060c0ac5fd - arrow_json::writer::set_column_for_json_rows::hf81407c8110e749e
at /home/mildbyte/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-json-33.0.0/src/writer.rs:305:13
28: 0x56060c0ad4e0 - arrow_json::writer::record_batches_to_json_rows::hb7bc69ec576f8414
at /home/mildbyte/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-json-33.0.0/src/writer.rs:422:17
29: 0x560606e8a5a3 - arrow_json::writer::Writer<W,F>::write_batches::h30f3fdafed90909c
at /home/mildbyte/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-json-33.0.0/src/writer.rs:581:20
30: 0x560606d5dddd - seafowl::frontend::http::physical_plan_to_json::{{closure}}::h701efe4e6b718a67
at /home/mildbyte/seafowl/src/frontend/http.rs:112:5
31: 0x560606d5f87e - seafowl::frontend::http::uncached_read_write_query::{{closure}}::h3359bcf1b25a4a2e
# warp / futures infra backtrace after this
If I remove the UNION ALL
or cast the timestamp to text before returning it, the error doesn't happen:
~ $ curl -XPOST localhost:8080/q -H "Content-Type: application/json" -d@- <<EOF
{"query": "
WITH ts AS (
SELECT * FROM (VALUES ('2020-01-01 00:00:00'::timestamp)) AS v
)
SELECT * FROM ts
"}
EOF
{"column1":"2020-01-01T00:00:00"}
~ $ curl -XPOST localhost:8080/q -H "Content-Type: application/json" -d@- <<EOF
{"query": "
WITH ts AS (
SELECT * FROM (VALUES ('2020-01-01 00:00:00'::timestamp)) AS v(t)
)
(SELECT t::text FROM ts) UNION ALL (SELECT t::text FROM ts)
"}
EOF
{"ts.t":"2020-01-01T00:00:00"}
{"ts.t":"2020-01-01T00:00:00"}
~ $
Great job sleuthing a minimal repro 🎉
The issue originates from a bug in arrow-json v33.0.0 that we're currently on.
Namely, what happens is that when iterating over the record batches in arrow_json::writer::record_batches_to_json_rows
an auxiliary vector doesn't get sliced properly, leading to out of bounds access attempt. This has been fixed in newer arrow-json versions (see apache/arrow-rs#3924 and apache/arrow-rs#3934), so will pick it up eventually (should be in v36.0.0).
In the meantime I can add something along the following lines as a mitigation (could pose a problem for very large outputs as it doubles the size of total record batch rows/columns in memory):
@@ -106,11 +107,12 @@ async fn physical_plan_to_json(
context: Arc<DefaultSeafowlContext>,
physical: Arc<dyn ExecutionPlan>,
) -> Result<Vec<u8>, DataFusionError> {
+ let schema_ref = physical.schema();
let batches = context.collect(physical).await?;
let mut buf = Vec::new();
let mut writer = LineDelimitedWriter::new(&mut buf);
writer
- .write_batches(&batches)
+ .write_batches(&[concat_batches(&schema_ref, batches.iter())?])
.map_err(DataFusionError::ArrowError)?;
writer.finish().map_err(DataFusionError::ArrowError)?;
Ok(buf)