apache / druid

Apache Druid: a high performance real-time analytics database.

Home Page:https://druid.apache.org/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Adding LIMIT to MV_TO_ARRAY then UNNEST will result in null and NullPointerException

ccyjoshua opened this issue · comments

Affected Version

28.0.1

Description

  • Cluster size

6 historicals, others are single service running

  • Configurations in use

$ cat common.runtime.properties

#
# Extensions
#

druid.extensions.directory=extensions
druid.extensions.hadoopDependenciesDir=hadoop-dependencies
druid.extensions.loadList=["druid-kafka-indexing-service","druid-datasketches","druid-multi-stage-query","druid-s3-extensions","druid-avro-extensions","druid-parquet-extensions","mysql-metadata-storage","druid-histogram","druid-lookups-cached-global","statsd-emitter"]

#
# Logging
#

# Log all runtime properties on startup. Disable to avoid logging properties on startup:
druid.startup.logging.logProperties=true

#
# Zookeeper
#

druid.zk.service.host=ae-druid-zk-1001.bdp.com,ae-druid-zk-1002.bdp.com,ae-druid-zk-1003.bdp.com,ae-druid-zk-1004.bdp.com,ae-druid-zk-1005.bdp.com
druid.zk.paths.base=amp-druid-dev

#
# Metadata storage
#

# For MySQL:
## druid.metadata.storage.type=mysql
druid.metadata.storage.type=mysql
druid.metadata.storage.connector.connectURI=jdbc:mysql://adi-druid-dev-db.bdp.com:3306/druid
druid.metadata.storage.connector.user=druid_user
druid.metadata.storage.connector.password=****

#
# Deep storage
#

# For S3:
druid.storage.type=s3
druid.storage.bucket=****
druid.storage.baseKey=druid/amp-druid-dev/segments

#
# Indexing service logs
#

# For S3:
druid.indexer.logs.type=s3
druid.indexer.logs.s3Bucket=****
druid.indexer.logs.s3Prefix=infrastructure/druid-dev/indexing-logs

#
# Service discovery
#

druid.selectors.indexing.serviceName=amp-druid-dev/overlord
druid.selectors.coordinator.serviceName=amp-druid-dev/coordinator

#
# Monitoring
#
druid.emitter=statsd
druid.emitter.statsd.prefix=adi
druid.emitter.statsd.includeHost=true
druid.emitter.logging.logLevel=debug
druid.emitter.statsd.logLevel=debug

# Configure `statsd-emitter` endpoint
druid.emitter.statsd.hostname=127.0.0.1
druid.emitter.statsd.port=8125

# Configure `statsd-emitter` to use dogstatsd format. Must be set to true, otherwise tags are not reported correctly to Datadog.
druid.emitter.statsd.dogstatsd=true
druid.emitter.statsd.dogstatsdServiceAsTag=true

druid.javascript.enabled=true

#
# Security
#
druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.metadata.storage.connector.password", "password", "key", "token", "pwd"]


#
# SQL
#
druid.sql.enable=true

# Planning SQL query when there is aggregate distinct in the statement
druid.sql.planner.useGroupingSetForExactDistinct=true

#
# Lookups
#
druid.lookup.enableLookupSyncOnStartup=false

#
# Expression processing config
#
druid.expressions.useStrictBooleans=true

#
# Http client
#

$ cat query/broker/runtime.properties

# Broker Process Configs

druid.host=amp-druid-dev-query-1001.bdp.com
druid.service=amp-druid-dev/broker
druid.plaintextPort=8082

# Query configuration

## Query Routing

druid.broker.balancer.type=connectionCount
druid.broker.select.tier=highestPriority

## Query prioritization

druid.query.scheduler.numThreads=95
druid.query.scheduler.prioritization.strategy=threshold
druid.query.scheduler.prioritization.periodThreshold=P30D
druid.query.scheduler.prioritization.adjustment=10

## Query laning

druid.query.scheduler.laning.strategy=hilo
druid.query.scheduler.laning.maxLowPercent=25

# Server Configuration

druid.server.http.numThreads=100
druid.server.http.queueSize=100
druid.server.http.maxIdleTime=PT5M
druid.server.http.enableRequestLimit=true
druid.server.http.defaultQueryTimeout=120000
druid.server.http.maxQueryTimeout=150000

## Guardrails for materialization of subqueries

druid.server.http.maxSubqueryRows=2147483647

# druid.server.http.maxSubqueryBytes=auto
# This config is experimental and currently breaks WINDOW_FUNCTIONS

# Client Configuration

druid.broker.http.numConnections=32
druid.broker.http.numMaxThreads=80
druid.broker.http.maxQueuedBytes=500000000

# Retry Policy

druid.broker.retryPolicy.numTries=1

# Processing

druid.processing.buffer.sizeBytes=536870912
druid.processing.numMergeBuffers=10
druid.processing.fifo=true
druid.processing.merge.useParallelMergePool=true
druid.processing.numThreads=31
druid.processing.tmpDir=/tmp/druid/processing

# SQL

druid.sql.enable=true
druid.sql.avatica.maxConnections=100
druid.sql.avatica.maxStatementsPerConnection=5
druid.sql.http.enable=true

# Broker Caching

druid.broker.cache.useCache=false
druid.broker.cache.populateCache=false

# Group by
druid.query.groupBy.maxOnDiskStorage=10737418240

# Monitoring

druid.monitoring.monitors=["org.apache.druid.client.cache.CacheMonitor","org.apache.druid.java.util.metrics.CpuAcctDeltaMonitor","org.apache.druid.java.util.metrics.JvmMonitor","org.apache.druid.java.util.metrics.JvmThreadsMonitor","org.apache.druid.server.metrics.EventReceiverFirehoseMonitor","org.apache.druid.server.metrics.HistoricalMetricsMonitor","org.apache.druid.server.metrics.QueryCountStatsMonitor"]
druid.monitoring.emissionPeriod=PT15S

$ cat query/router/runtime.properties

# Router Process Configs

druid.host=amp-druid-dev-query-1001.bdp.com
druid.service=amp-druid-dev/router
druid.plaintextPort=8088

# Runtime Configuration

druid.router.defaultBrokerServiceName=amp-druid-dev/broker
druid.router.coordinatorServiceName=amp-druid-dev/coordinator
druid.router.tierToBrokerMap={"_default_tier":"amp-druid-dev/broker"}
druid.router.strategy=connectionCount
druid.router.strategy.connectionCount.loadBalancingCoefficient=1
druid.router.strategy.connectionCount.queryCapacityCoefficient=1
druid.router.avatica.balancer.type=consistentHash
druid.router.managementProxy.enabled=true
druid.router.http.numConnections=50
druid.router.http.eagerInitialization=true
druid.router.http.readTimeout=PT2M
druid.router.http.numMaxThreads=100
druid.router.sql.enable=true

# Monitoring

druid.monitoring.monitors=["org.apache.druid.client.cache.CacheMonitor","org.apache.druid.java.util.metrics.CpuAcctDeltaMonitor","org.apache.druid.java.util.metrics.JvmMonitor","org.apache.druid.java.util.metrics.JvmThreadsMonitor","org.apache.druid.server.metrics.EventReceiverFirehoseMonitor","org.apache.druid.server.metrics.QueryCountStatsMonitor"]
druid.monitoring.emissionPeriod=PT15S

$ cat data/historical/runtime.properties

# Historical Process Configuration

druid.host=amp-druid-dev-query-1001.bdp.com
druid.service=amp-druid-dev/historical
druid.plaintextPort=8083

# istorical General Configuration

druid.server.maxSize=3400000000000
druid.server.tier=_default_tier
druid.server.priority=0

# Storing Segments

druid.segmentCache.locations=[{"path":"/opt/druid-segment-cache","maxSize"\:3400000000000}]
druid.segmentCache.locationSelector.strategy=leastBytesUsed

# Historical Query Configs

druid.server.http.numThreads=300
druid.server.http.maxQueryTimeout=120000
druid.server.http.defaultQueryTimeout=120000

# Processing

druid.processing.buffer.sizeBytes=536870912
druid.processing.numMergeBuffers=4
druid.processing.numThreads=15
druid.processing.fifo=true

# Historical Caching

druid.historical.cache.useCache=true
druid.historical.cache.populateCache=true
druid.cache.type=caffeine

# druid.cache.sizeInBytes=20G bytes = 21_474_836_480 bytes; min(1GiB, Runtime.maxMemory / 10)
druid.cache.sizeInBytes=21474836480

# 7 Days
druid.cache.expireAfter=604800000
druid.cache.evictOnClose=true
druid.cache.cacheExecutorFactory=COMMON_FJP

# GroupBy Query Config

# druid.query.groupBy.maxOnDiskStorage=8G bytes = 8_000_000_000 bytes
druid.query.groupBy.maxOnDiskStorage=8000000000

# Monitoring

druid.monitoring.monitors=["org.apache.druid.client.cache.CacheMonitor","org.apache.druid.java.util.metrics.CpuAcctDeltaMonitor","org.apache.druid.java.util.metrics.JvmMonitor","org.apache.druid.java.util.metrics.JvmThreadsMonitor","org.apache.druid.server.metrics.EventReceiverFirehoseMonitor","org.apache.druid.server.metrics.HistoricalMetricsMonitor","org.apache.druid.serve
  • Steps to reproduce the problem

Ingest inline data using spec below.

{
  "type": "index_parallel",
  "spec": {
    "ioConfig": {
      "type": "index_parallel",
      "inputSource": {
        "type": "inline",
        "data": "{\"timestamp\":\"2019-08-25T00:00:00.031Z\",\"agent_type\":\"Browser\",\"lang1\":[\"es\",\"es-419\"],\"lang2\":[\"a\",\"b\"],\"session_length\":3}\n{\"timestamp\":\"2019-08-25T00:00:00.031Z\",\"agent_type\":\"Browser\",\"lang1\":[\"en\",\"en-us\"],\"lang2\":[\"c\",\"d\"],\"session_length\":5}\n{\"timestamp\":\"2019-08-25T00:00:00.031Z\",\"agent_type\":\"TV\",\"lang1\":[\"es\"],\"lang2\":[\"a\"],\"session_length\":7}\n{\"timestamp\":\"2019-08-25T00:00:00.031Z\",\"agent_type\":\"TV\",\"lang1\":[\"es\",\"es-419\"],\"lang2\":[\"a\",\"b\"],\"session_length\":1}"
      },
      "inputFormat": {
        "type": "json"
      }
    },
    "tuningConfig": {
      "type": "index_parallel",
      "partitionsSpec": {
        "type": "dynamic"
      }
    },
    "dataSchema": {
      "dataSource": "mv_debug_sample",
      "timestampSpec": {
        "column": "timestamp",
        "format": "iso"
      },
      "dimensionsSpec": {
        "dimensions": [
          "agent_type",
          "lang1",
          "lang2",
          {
            "type": "long",
            "name": "session_length"
          }
        ]
      },
      "granularitySpec": {
        "queryGranularity": "none",
        "rollup": false,
        "segmentGranularity": "day"
      }
    }
  }
}

Run query below from UI. Removing LIMIT will resolve the error.

WITH a AS (
  SELECT 
    "agent_type",
    CASE "agent_type" WHEN 'Browser' THEN MV_TO_ARRAY("lang1") ELSE MV_TO_ARRAY("lang2") END AS lang,
    "session_length"
  FROM mv_debug_sample
  LIMIT 5
)
SELECT * FROM a CROSS JOIN unnest(lang) AS t(elem)
  • The error message or stack traces encountered. Providing more context, such as nearby log messages or even entire logs, can be helpful.

Error message from UI:

Error: RUNTIME_FAILURE (OPERATOR)
null
java.lang.NullPointerException

Error in the log with debug=true:

024-03-10T00:11:12,891 INFO [sql[a88cf147-28fb-45ae-b692-0bf3f642afa5]] org.apache.druid.sql.calcite.planner.CalciteRulesManager - Start
LogicalSort(fetch=[1001:BIGINT]): rowcount = 5.0, cumulative cost = {inf}, id = 1543612
  LogicalProject(agent_type=[$0], lang=[$1], session_length=[$2], elem=[$3]): rowcount = 5.0, cumulative cost = {inf}, id = 1543606
    LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}]): rowcount = 5.0, cumulative cost = {inf}, id = 1543604
      LogicalSort(fetch=[5]): rowcount = 5.0, cumulative cost = {205.0 rows, 521.0 cpu, 0.0 io}, id = 1543595
        LogicalProject(agent_type=[$1], lang=[CASE(=($1, 'Browser'), MV_TO_ARRAY($2), MV_TO_ARRAY($3))], session_length=[$4]): rowcount = 100.0, cumulative cost = {200.0 rows, 401.0 cpu, 0.0 io}, id = 1543593
          LogicalTableScan(table=[[druid, mv_debug_sample]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}, id = 1543567
      LogicalProject(elem=[$0]): rowcount = 1.0, cumulative cost = {4.0 rows, 4.0 cpu, 0.0 io}, id = 1543602
        Uncollect: rowcount = 1.0, cumulative cost = {3.0 rows, 3.0 cpu, 0.0 io}, id = 1543600
          LogicalProject(lang=[$cor0.lang]): rowcount = 1.0, cumulative cost = {2.0 rows, 2.0 cpu, 0.0 io}, id = 1543598
            LogicalValues(tuples=[[{ 0 }]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io}, id = 1543570

2024-03-10T00:11:12,892 INFO [sql[a88cf147-28fb-45ae-b692-0bf3f642afa5]] org.apache.druid.sql.calcite.planner.CalciteRulesManager - After PreProgram
LogicalSort(fetch=[1001:BIGINT]): rowcount = 5.0, cumulative cost = {inf}, id = 1543688
  LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}]): rowcount = 5.0, cumulative cost = {inf}, id = 1543686
    LogicalProject(agent_type=[$1], lang=[CASE(=($1, 'Browser'), MV_TO_ARRAY($2), MV_TO_ARRAY($3))], session_length=[$4]): rowcount = 5.0, cumulative cost = {110.0 rows, 276.0 cpu, 0.0 io}, id = 1543693
      LogicalSort(fetch=[5]): rowcount = 5.0, cumulative cost = {105.0 rows, 261.0 cpu, 0.0 io}, id = 1543690
        LogicalTableScan(table=[[druid, mv_debug_sample]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}, id = 1543567
    Uncollect: rowcount = 1.0, cumulative cost = {3.0 rows, 3.0 cpu, 0.0 io}, id = 1543684
      LogicalProject(lang=[$cor0.lang]): rowcount = 1.0, cumulative cost = {2.0 rows, 2.0 cpu, 0.0 io}, id = 1543682
        LogicalValues(tuples=[[{ 0 }]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io}, id = 1543570

2024-03-10T00:11:12,894 INFO [sql[a88cf147-28fb-45ae-b692-0bf3f642afa5]] org.apache.druid.sql.calcite.planner.CalciteRulesManager - After volcano planner program
DruidCorrelateUnnestRel(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}], query=[{"queryType":"scan","dataSource":{"type":"table","name":"__correlate_unnest__"},"intervals":{"type":"intervals","intervals":
["-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"]},"resultFormat":"compactedList","limit":1001,"columns":["$f0","agent_type","lang","session_length"],"legacy":false,"context":{"debug":true,"queryId":"a8
8cf147-28fb-45ae-b692-0bf3f642afa5","sqlOuterLimit":1001,"sqlQueryId":"a88cf147-28fb-45ae-b692-0bf3f642afa5"},"granularity":{"type":"all"}}], signature=[{agent_type:STRING, lang:ARRAY<STRING>, session_length:LONG, $f0
:STRING}]): rowcount = 1.0, cumulative cost = {100002.59999999999 rows, 1.0 cpu, 0.0 io}, id = 1543742
  DruidQueryRel(query=[{"queryType":"scan","dataSource":{"type":"table","name":"mv_debug_sample"},"intervals":{"type":"intervals","intervals":["-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"]},"virtualC
olumns":[{"type":"expression","name":"v0","expression":"case_searched((\"agent_type\" == 'Browser'),mv_to_array(\"lang1\"),mv_to_array(\"lang2\"))","outputType":"ARRAY<STRING>"}],"resultFormat":"compactedList","limit"
:5,"columns":["agent_type","session_length","v0"],"legacy":false,"context":{"debug":true,"queryId":"a88cf147-28fb-45ae-b692-0bf3f642afa5","sqlOuterLimit":1001,"sqlQueryId":"a88cf147-28fb-45ae-b692-0bf3f642afa5"},"granularity":{"type":"all"}}], signature=[{agent_type:STRING, v0:ARRAY<STRING>, session_length:LONG}]): rowcount = 1.0, cumulative cost = {0.9 rows, 0.0 cpu, 0.0 io}, id = 1543727
  DruidUnnestRel(expr=[$cor0.lang], filter=[null]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io}, id = 1543721

024-03-10T00:11:12,894 WARN [sql[a88cf147-28fb-45ae-b692-0bf3f642afa5]] org.apache.druid.sql.http.SqlResource - Exception while processing sqlQueryId[a88cf147-28fb-45ae-b692-0bf3f642afa5]
org.apache.druid.error.DruidException: null
        at org.apache.druid.error.DruidException$DruidExceptionBuilder.build(DruidException.java:460) ~[druid-processing-28.0.1.jar:28.0.1]
        at org.apache.druid.error.QueryExceptionCompat.makeException(QueryExceptionCompat.java:50) ~[druid-processing-28.0.1.jar:28.0.1]
        at org.apache.druid.error.DruidException.fromFailure(DruidException.java:154) ~[druid-processing-28.0.1.jar:28.0.1]
        at org.apache.druid.server.QueryResultPusher.handleQueryException(QueryResultPusher.java:211) ~[druid-server-28.0.1.jar:28.0.1]
        at org.apache.druid.server.QueryResultPusher.push(QueryResultPusher.java:177) ~[druid-server-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.http.SqlResource.doPost(SqlResource.java:121) ~[druid-sql-28.0.1.jar:28.0.1]
        at jdk.internal.reflect.GeneratedMethodAccessor137.invoke(Unknown Source) ~[?:?]
        at jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:?]
        at java.lang.reflect.Method.invoke(Method.java:566) ~[?:?]
        at com.sun.jersey.spi.container.JavaMethodInvokerFactory$1.invoke(JavaMethodInvokerFactory.java:60) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.model.method.dispatch.AbstractResourceMethodDispatchProvider$ResponseOutInvoker._dispatch(AbstractResourceMethodDispatchProvider.java:205) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.model.method.dispatch.ResourceJavaMethodDispatcher.dispatch(ResourceJavaMethodDispatcher.java:75) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.uri.rules.HttpMethodRule.accept(HttpMethodRule.java:302) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.uri.rules.ResourceClassRule.accept(ResourceClassRule.java:108) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.uri.rules.RightHandPathRule.accept(RightHandPathRule.java:147) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.uri.rules.RootResourceClassesRule.accept(RootResourceClassesRule.java:84) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.application.WebApplicationImpl._handleRequest(WebApplicationImpl.java:1542) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.application.WebApplicationImpl._handleRequest(WebApplicationImpl.java:1473) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.application.WebApplicationImpl.handleRequest(WebApplicationImpl.java:1419) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.application.WebApplicationImpl.handleRequest(WebApplicationImpl.java:1409) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.spi.container.servlet.WebComponent.service(WebComponent.java:409) ~[jersey-servlet-1.19.4.jar:1.19.4]
        at com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:558) ~[jersey-servlet-1.19.4.jar:1.19.4]
        at com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:733) ~[jersey-servlet-1.19.4.jar:1.19.4]
        at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) ~[javax.servlet-api-3.1.0.jar:3.1.0]
        at com.google.inject.servlet.ServletDefinition.doServiceImpl(ServletDefinition.java:286) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.ServletDefinition.doService(ServletDefinition.java:276) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.ServletDefinition.service(ServletDefinition.java:181) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.ManagedServletPipeline.service(ManagedServletPipeline.java:91) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:85) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.ManagedFilterPipeline.dispatch(ManagedFilterPipeline.java:120) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.GuiceFilter.doFilter(GuiceFilter.java:135) ~[guice-servlet-4.1.0.jar:?]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.PreResponseAuthorizationCheckFilter.doFilter(PreResponseAuthorizationCheckFilter.java:84) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.initialization.jetty.StandardResponseHeaderFilterHolder$StandardResponseHeaderFilter.doFilter(StandardResponseHeaderFilterHolder.java:164) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.AllowHttpMethodsResourceFilter.doFilter(AllowHttpMethodsResourceFilter.java:78) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.AllowOptionsResourceFilter.doFilter(AllowOptionsResourceFilter.java:74) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.AllowAllAuthenticator$1.doFilter(AllowAllAuthenticator.java:84) ~[druid-server-28.0.1.jar:28.0.1]
        at org.apache.druid.server.security.AuthenticationWrappingFilter.doFilter(AuthenticationWrappingFilter.java:59) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.AllowAllAuthenticator$1.doFilter(AllowAllAuthenticator.java:84) ~[druid-server-28.0.1.jar:28.0.1]
        at org.apache.druid.server.security.AuthenticationWrappingFilter.doFilter(AuthenticationWrappingFilter.java:59) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.SecuritySanityCheckFilter.doFilter(SecuritySanityCheckFilter.java:77) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.initialization.jetty.LimitRequestsFilter.doFilter(LimitRequestsFilter.java:64) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:552) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1624) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:505) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1594) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:772) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:59) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.StatisticsHandler.handle(StatisticsHandler.java:181) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.Server.handle(Server.java:516) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311) ~[jetty-io-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105) ~[jetty-io-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104) ~[jetty-io-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:409) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at java.lang.Thread.run(Thread.java:829) ~[?:?]
Caused by: org.apache.druid.query.QueryInterruptedException
        ... 82 more
Caused by: java.lang.NullPointerException
        at org.apache.druid.sql.calcite.expression.DruidExpression.getDirectColumn(DruidExpression.java:356) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.calcite.rel.DruidCorrelateUnnestRel.toDruidQuery(DruidCorrelateUnnestRel.java:212) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.calcite.rel.DruidRel.runQuery(DruidRel.java:55) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.calcite.planner.QueryHandler.lambda$planWithDruidConvention$6(QueryHandler.java:621) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.calcite.planner.PlannerResult.run(PlannerResult.java:62) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.DirectStatement$ResultSet.run(DirectStatement.java:109) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.http.SqlResource$SqlResourceQueryResultPusher$1.start(SqlResource.java:249) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.server.QueryResultPusher.push(QueryResultPusher.java:121) ~[druid-server-28.0.1.jar:28.0.1]
        ... 81 more
  • Any debugging that you have already done

If I do not use CASE, the array column is null, as showing in query below.

WITH a AS (
  SELECT 
    "agent_type",
    MV_TO_ARRAY("lang1") as lang,
    "session_length"
  FROM mv_debug_sample
  LIMIT 5
)
SELECT * FROM a CROSS JOIN unnest(lang) AS t(elem)

Result:

agent_type	lang	session_length	elem
Browser	null	5	en
Browser	null	5	en-us
Browser	null	3	es
Browser	null	3	es-419
TV	null	7	es
TV	null	1	es
TV	null	1	es-419

This only occurs with LIMIT in subquery, and with UNNEST.
The explained native query is below, which misses mv_to_array as virtual column. So the root cause is native query translation.

{
  "queryType": "scan",
  "dataSource": {
    "type": "unnest",
    "base": {
      "type": "query",
      "query": {
        "queryType": "scan",
        "dataSource": {
          "type": "table",
          "name": "mv_debug_sample"
        },
        "intervals": {
          "type": "intervals",
          "intervals": [
            "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
          ]
        },
        "resultFormat": "compactedList",
        "limit": 5,
        "columns": [
          "agent_type",
          "lang1",
          "session_length"
        ],
        "legacy": false,
        "context": {
          "debug": true,
          "queryId": "305f871b-941b-4102-b337-8e71c29a14a3",
          "sqlOuterLimit": 1001,
          "sqlQueryId": "305f871b-941b-4102-b337-8e71c29a14a3",
          "useNativeQueryExplain": true
        },
        "granularity": {
          "type": "all"
        }
      }
    },
    "virtualColumn": {
      "type": "expression",
      "name": "j0.unnest",
      "expression": "\"lang1\"",
      "outputType": "STRING"
    },
    "unnestFilter": null
  },
  "intervals": {
    "type": "intervals",
    "intervals": [
      "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
    ]
  },
  "resultFormat": "compactedList",
  "limit": 1001,
  "columns": [
    "agent_type",
    "j0.unnest",
    "session_length",
    "v0"
  ],
  "legacy": false,
  "context": {
    "debug": true,
    "queryId": "305f871b-941b-4102-b337-8e71c29a14a3",
    "sqlOuterLimit": 1001,
    "sqlQueryId": "305f871b-941b-4102-b337-8e71c29a14a3",
    "useNativeQueryExplain": true
  },
  "granularity": {
    "type": "all"
  }
}

If I add virtual column with mv_to_array, then it works as expected as below.

{
  "queryType": "scan",
  "dataSource": {
    "type": "unnest",
    "base": {
      "type": "query",
      "query": {
        "queryType": "scan",
        "dataSource": {
          "type": "table",
          "name": "mv_debug_sample"
        },
        "intervals": {
          "type": "intervals",
          "intervals": [
            "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
          ]
        },
        "virtualColumns": [
          {
            "type": "expression",
            "name": "v0",
            "expression": "mv_to_array(\"lang1\")",
            "outputType": "ARRAY<STRING>"
          }
        ],
        "resultFormat": "compactedList",
        "limit": 5,
        "columns": [
          "agent_type",
          "v0",
          "session_length"
        ],
        "legacy": false,
        "context": {
          "debug": true,
          "queryId": "305f871b-941b-4102-b337-8e71c29a14a3",
          "sqlOuterLimit": 1001,
          "sqlQueryId": "305f871b-941b-4102-b337-8e71c29a14a3",
          "useNativeQueryExplain": true
        },
        "granularity": {
          "type": "all"
        }
      }
    },
    "virtualColumn": {
      "type": "expression",
      "name": "j0.unnest",
      "expression": "\"v0\"",
      "outputType": "STRING"
    },
    "unnestFilter": null
  },
  "intervals": {
    "type": "intervals",
    "intervals": [
      "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
    ]
  },
  "resultFormat": "compactedList",
  "limit": 1001,
  "columns": [
    "agent_type",
    "j0.unnest",
    "session_length",
    "v0"
  ],
  "legacy": false,
  "context": {
    "debug": true,
    "queryId": "305f871b-941b-4102-b337-8e71c29a14a3",
    "sqlOuterLimit": 1001,
    "sqlQueryId": "305f871b-941b-4102-b337-8e71c29a14a3",
    "useNativeQueryExplain": true
  },
  "granularity": {
    "type": "all"
  }
}

Created #15891 to address this type of issue