compute() is giving an error
Zhuk66 opened this issue · comments
Here is my example (this example works with the previous version of sparklyr):
library(dplyr)
library(dbplyr)
library(arrow)
library(sparklyr)
library(DBI)
spark_disconnect_all()
conf <- spark_config()
conf$spark.yarn.queue
<- "myqueue"
cn <- spark_connect(master = "yarn-client", config = conf, app_name = "snap_test")
tst <- tbl(cn, "mydb.mytable") %>%
head(200) %>%
compute()
Error in db_save_query.DBIConnection()
:
! Can't save query to "dbplyr_001".
Caused by error:
! org.apache.spark.sql.catalyst.parser.ParseException:
no viable alternative at input 'CREATE OR REPLACE TEMPORARY VIEW \n"dbplyr_001"'(line 2, pos 0)
== SQL ==
CREATE OR REPLACE TEMPORARY VIEW
"dbplyr_001" AS SELECT *
^^^
FROM mydb
.mytable
LIMIT 200
at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:241)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:117)
at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:69)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:643)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at sparklyr.Invoke.invoke(invoke.scala:161)
at sparklyr.StreamHandler.handleMethodCall(stream.scala:141)
at sparklyr.StreamHandler.read(stream.scala:62)
at sparklyr.BackendHandler$$anonfun$channelRead0$1.apply$mcV$sp(handler.scala:60)
at scala.util.control.Breaks.breakable(Breaks.scala:38)
at sparklyr.BackendHandler.channelRead0(handler.scala:40)
at sparklyr.BackendHandler.channelRead0(handler.scala:14)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)
at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:324)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:296)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)
at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919)
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:166)
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:719)
at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:655)
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:581)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:750)
Run rlang::last_trace()
to see where the error occurred.
20: (function ()
traceback(2))()
19: stop(fallback)
18: signal_abort(cnd, .file)
17: rlang::abort(message, ..., call = call, use_cli_format = TRUE,
.frame = .frame)
16: cli_abort("Can't save query to {.val {name}}.", parent = cnd)
15: value[3L]
14: tryCatchOne(expr, names, parentenv, handlers[[1L]])
13: tryCatchList(expr, classes, parentenv, handlers)
12: tryCatch(DBI::dbExecute(con, sql, immediate = TRUE), error = function(cnd) {
name <- as.sql(name, con = con)
...
11: db_save_query.DBIConnection(con, ...)
10: eval_bare(expr((!!fun)(con, ...)))
9: dbplyr_fallback(con, "db_save_query", ...)
8: dbplyr_save_query(con, sql, table, temporary = temporary)
7: db_compute.DBIConnection(x$src$con, name, sql, temporary = temporary,
unique_indexes = unique_indexes, indexes = indexes, analyze = analyze,
...
6: db_compute(x$src$con, name, sql, temporary = temporary, unique_indexes = unique_indexes,
indexes = indexes, analyze = analyze, ...)
5: compute.tbl_sql(.)
4: NextMethod()
3: compute.tbl_spark(.)
2: compute(.)
1: tbl(cn, "mydb.mytable") %>% head(200) %>% compute()
This query works if I run it in spark-sql:
spark-sql>
>
>
> CREATE OR REPLACE TEMPORARY VIEW
> "dbplyr_001" AS SELECT *
> FROM mydb
.mytable
> LIMIT 200
>
>
Our SPARK version: spark-2.4.8
Session info:
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux
Matrix products: default
BLAS: /opt/revr/ropen/4.0.2/lib64/R/lib/libRblas.so
LAPACK: /opt/revr/ropen/4.0.2/lib64/R/lib/libRlapack.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
[4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] DBI_1.1.3 sparklyr_1.8.2 arrow_12.0.1.1 dbplyr_2.3.3
[5] purrr_1.0.2 dplyr_1.1.2 pryr_0.1.6 devtools_2.4.5
[9] usethis_2.2.2 RevoUtilsMath_11.0.0
loaded via a namespace (and not attached):
[1] RevoUtils_11.0.2 tidyselect_1.2.0 remotes_2.4.2.1 vctrs_0.6.3 generics_0.1.3
[6] miniUI_0.1.1.1 htmltools_0.5.6 yaml_2.3.7 base64enc_0.1-3 utf8_1.2.3
[11] rlang_1.1.1 pkgbuild_1.4.2 urlchecker_1.0.1 later_1.3.1 pillar_1.9.0
[16] withr_2.5.0 glue_1.6.2 bit64_4.0.5 sessioninfo_1.2.2 lifecycle_1.0.3
[21] stringr_1.5.0 htmlwidgets_1.6.2 codetools_0.2-19 memoise_2.0.1 callr_3.7.3
[26] fastmap_1.1.1 httpuv_1.6.3 ps_1.7.5 parallel_4.0.2 fansi_1.0.4
[31] Rcpp_1.0.11 xtable_1.8-4 openssl_2.1.0 promises_1.2.1 cachem_1.0.8
[36] pkgload_1.3.2.1 jsonlite_1.8.7 config_0.3.1 mime_0.12 fs_1.6.3
[41] bit_4.0.5 askpass_1.1 digest_0.6.33 stringi_1.7.12 processx_3.8.2
[46] shiny_1.7.5 cli_3.6.1 tools_4.0.2 magrittr_2.0.3 tibble_3.2.1
[51] profvis_0.3.8 tidyr_1.3.0 crayon_1.5.2 pkgconfig_2.0.3 ellipsis_0.3.2
[56] prettyunits_1.1.1 assertthat_0.2.1 httr_1.4.7 rstudioapi_0.15.0 R6_2.5.1
[61] compiler_4.0.2
I think the issue with compute(), not collect().
Hi, the issue is in calling mydb.mytable
directly in the tbl()
command. It will not properly parse. The solution is to use dbplyr
's in_schema()
function. So you would use: tbl(cn, dbplyr::in_schema("mydb", "mytable"))
in your example. That will create the correct reference.
Thanks @edgararuiz - this solution worked for me. Sounds like we need to open this ticket with dbplyr. The syntax we have been using above has worked for many years, so something must have changed in the defaults of dbplyr.
@Zhuk66 - could you reopen this ticket in dbplyr and tag this issue?
Sounds good, I'll go ahead and close this issue. Thank you @nviets