compute doesn't work in sparklyr 1.8.2
joscani opened this issue · comments
Hi. I found a possible problem wih sparklyr 1.8.2 in spark 2.4.5 . I write down my code
library(tidyverse)
library(sparklyr)
my_init_spark <- function(executor_instances = 10,
executor_cores = 5,
executor_memory = "10G",
driver_memory = "10G",
app_name = "churn-b2c") {
require(sparklyr)
require(tidyverse)
conf <- sparklyr::spark_config()
conf$spark.sql.catalogImplementation <- "hive"
conf$spark.dynamicAllocation.enabled <- "true"
conf$spark.executor.instances <- executor_instances
conf$spark.dynamicAllocation.minExecutors <- 1
conf$spark.dynamicAllocation.maxExecutors <- 40
conf$spark.executor.cores <- executor_cores
conf$spark.executor.memory <- executor_memory
conf$spark.driver.memory <- driver_memory
conf$spark.memory.fraction <- 0.95
sc <- spark_connect(
master = "yarn",
version = "2.4.5",
config = conf,
app_name = app_name
)
return(sc)
}
sc <- my_init_spark(app_name = "bigb2c-1109", executor_instances = 10,
executor_cores = 6, executor_memory = "20G" )
iris_sp <- sdf_copy_to(sc, iris)
iris_cache <- iris_sp %>% compute()
The error
Error in `db_save_query.DBIConnection()`:
! Can't save query to "dbplyr_001".
Caused by error:
! org.apache.spark.sql.catalyst.parser.ParseException:
no viable alternative at input 'CREATE OR REPLACE TEMPORARY VIEW \n"dbplyr_001"'(line 2, pos 0)
== SQL ==
CREATE OR REPLACE TEMPORARY VIEW
"dbplyr_001" AS SELECT *
^^^
FROM `iris`
at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:241)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:117)
at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:69)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:643)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at sparklyr.Invoke.invoke(invoke.scala:161)
at sparklyr.StreamHandler.handleMethodCall(stream.scala:141)
at sparklyr.StreamHandler.read(stream.scala:62)
at sparklyr.BackendHandler$$anonfun$channelRead0$1.apply$mcV$sp(handler.scala:60)
at scala.util.control.Breaks.breakable(Breaks.scala:38)
at sparklyr.BackendHandler.channelRead0(handler.scala:40)
at sparklyr.BackendHandler.channelRead0(handler.scala:14)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)
at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:328)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:302)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:352)
at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1422)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:374)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:360)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:931)
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:163)
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:700)
at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:635)
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:552)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:514)
at io.netty.util.concurrent.SingleThreadEventExecutor$6.run(SingleThreadEventExecutor.java:1044)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.lang.Thread.run(Thread.java:750)
Same code works in old sparklyr versions over the same spark cluster.
Any idea? Thanks a lot
Even, in 1.8.1. this works
# with sparklyr 1.8.1 and spark 2.4.8
library(sparklyr)
library(tidyverse)
sc <- spark_connect(master="local")
iris_sp <- sdf_copy_to(sc, iris)
iris_cache <- iris_sp %>% compute()
iris_cache
# Source: spark<dbplyr_002> [?? x 5]
Sepal_Length Sepal_Width Petal_Length Petal_Width Species
<dbl> <dbl> <dbl> <dbl> <chr>
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
7 4.6 3.4 1.4 0.3 setosa
8 5 3.4 1.5 0.2 setosa
9 4.4 2.9 1.4 0.2 setosa
10 4.9 3.1 1.5 0.1 setosa
the same in sparklyr 1.8.2
> iris_cache <- iris_sp %>% compute()
Error in `db_save_query.DBIConnection()`:
! Can't save query to "dbplyr_001".
Caused by error:
! org.apache.spark.sql.catalyst.parser.ParseException:
no viable alternative at input 'CREATE OR REPLACE TEMPORARY VIEW \n"dbplyr_001"'(line 2, pos 0)
Minimum reprex on 1.8.2 using a "local" Spark session:
suppressPackageStartupMessages(library(sparklyr))
packageVersion("sparklyr")
#> [1] '1.8.2'
suppressPackageStartupMessages(library(dplyr))
sc <- spark_connect("local")
tbl_mtcars <- copy_to(sc, mtcars)
tbl_mtcars %>% count(am) %>% compute()
#> Error in `db_save_query.DBIConnection()`:
#> ! Can't save query to "dbplyr_001".
#> Caused by error:
#> ! org.apache.spark.sql.catalyst.parser.ParseException:
#> no viable alternative at input 'CREATE OR REPLACE TEMPORARY VIEW \n"dbplyr_001"'(line 2, pos 0)
#>
#> == SQL ==
#> CREATE OR REPLACE TEMPORARY VIEW
#> "dbplyr_001" AS SELECT `am`, COUNT(*) AS `n`
#> ^^^
#> FROM `mtcars`
#> GROUP BY `am`
#>
#> at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:266)
#> at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:133)
#> at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
#> at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:81)
#> at org.apache.spark.sql.SparkSession.$anonfun$sql$2(SparkSession.scala:604)
#> at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
#> at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:604)
#> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
#> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601)
#> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
#> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
#> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
#> at java.lang.reflect.Method.invoke(Method.java:498)
#> at sparklyr.Invoke.invoke(invoke.scala:161)
#> at sparklyr.StreamHandler.handleMethodCall(stream.scala:141)
#> at sparklyr.StreamHandler.read(stream.scala:62)
#> at sparklyr.BackendHandler.$anonfun$channelRead0$1(handler.scala:60)
#> at scala.util.control.Breaks.breakable(Breaks.scala:42)
#> at sparklyr.BackendHandler.channelRead0(handler.scala:41)
#> at sparklyr.BackendHandler.channelRead0(handler.scala:14)
#> at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)
#> at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
#> at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
#> at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)
#> at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
#> at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
#> at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
#> at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)
#> at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:321)
#> at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:295)
#> at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
#> at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
#> at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)
#> at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410)
#> at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
#> at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
#> at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919)
#> at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:163)
#> at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:714)
#> at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:650)
#> at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:576)
#> at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)
#> at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)
#> at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
#> at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
#> at java.lang.Thread.run(Thread.java:750)
#> Backtrace:
#> ▆
#> 1. ├─tbl_mtcars %>% count(am) %>% compute()
#> 2. ├─dplyr::compute(.)
#> 3. ├─sparklyr:::compute.tbl_spark(.)
#> 4. ├─base::NextMethod()
#> 5. └─dbplyr:::compute.tbl_sql(.)
#> 6. ├─dbplyr::db_compute(...)
#> 7. └─dbplyr:::db_compute.DBIConnection(...)
#> 8. └─dbplyr:::dbplyr_save_query(con, sql, table, temporary = temporary)
#> 9. └─dbplyr:::dbplyr_fallback(con, "db_save_query", ...)
#> 10. ├─rlang::eval_bare(expr((!!fun)(con, ...)))
#> 11. └─dbplyr:::db_save_query.DBIConnection(con, ...)
#> 12. └─base::tryCatch(...)
#> 13. └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#> 14. └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#> 15. └─value[[3L]](cond)
#> 16. └─cli::cli_abort("Can't save query to {.val {name}}.", parent = cnd)
#> 17. └─rlang::abort(...)
DBI::dbListTables(sc)
#> [1] "mtcars"
spark_disconnect(sc)
Created on 2023-08-04 with reprex v2.0.2
But it does work with the dev version
suppressPackageStartupMessages(library(sparklyr))
packageVersion("sparklyr")
#> [1] '1.8.2.9000'
suppressPackageStartupMessages(library(dplyr))
sc <- spark_connect("local")
tbl_mtcars <- copy_to(sc, mtcars)
tbl_mtcars %>% count(am) %>% compute()
#> # Source: spark<dbplyr_001> [?? x 2]
#> am n
#> <dbl> <dbl>
#> 1 1 13
#> 2 0 19
DBI::dbListTables(sc)
#> [1] "dbplyr_001" "mtcars"
spark_disconnect(sc)
Created on 2023-08-04 with reprex v2.0.2
@joscani - would you mind installing the DEV version and trying it again? I was able to confirm that it works with a "local" Spark session, but I see that you need it to work in YARN, so it would be good to make sure
remotes::install_github("sparklyr/sparklyr")
Thanks @edgararuiz .
I'll try next Monday .
It works. Thanks a lot @edgararuiz