prestodb / RPresto

DBI-based adapter for Presto for the statistical programming language R.

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

When used with `dbplyr` v2.4.0, nested CTEs result in nested WITH statements

jarodmeng opened this issue · comments

library(RPresto)
library(DBI)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

packageVersion("RPresto")
#> [1] '1.4.6'
packageVersion("dbplyr")
#> [1] '2.4.0'
packageVersion("DBI")
#> [1] '1.1.3'

con <- DBI::dbConnect(
  drv = RPresto::Presto(),
  host = "http://localhost",
  port = 8080,
  user = Sys.getenv("USER"),
  catalog = "memory",
  schema = "default"
)

if (DBI::dbExistsTable(con, "mtcars")) {
  DBI::dbRemoveTable(con, "mtcars")
}
tbl.mtcars <- dplyr::copy_to(dest = con, df = mtcars, name = "mtcars")
tbl.mtcars.avg <- tbl.mtcars %>%
  group_by(cyl) %>%
  summarize(wt_avg = mean(wt, na.rm = TRUE)) %>%
  compute(name = "avg", cte = TRUE)
tbl.mtcars.join <- tbl.mtcars %>%
  left_join(tbl.mtcars.avg, by = "cyl") %>%
  compute(name = "join", cte = TRUE)
tbl.mtcars.join %>%
  show_query()
#> <SQL>
#> WITH "avg" AS (
#> SELECT "cyl", AVG("wt") AS "wt_avg"
#> FROM "mtcars"
#> GROUP BY "cyl"
#> ),
#> "join" AS (
#> WITH "avg" AS (
#> SELECT "cyl", AVG("wt") AS "wt_avg"
#> FROM "mtcars"
#> GROUP BY "cyl"
#> )
#> SELECT "mtcars".*, "wt_avg"
#> FROM "mtcars"
#> LEFT JOIN "avg"
#>   ON ("mtcars"."cyl" = "avg"."cyl")
#> )
#> SELECT *
#> FROM "join"

Created on 2023-11-06 with reprex v2.0.2

This is caused by dbplyr::db_sql_render() doesn't pass ... ellipsis (see tidyverse/dbplyr#1394).

Confirmed that 1.4.6.9000 fixes the problem.

library(RPresto)
library(DBI)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

packageVersion("RPresto")
#> [1] '1.4.6.9000'
packageVersion("dbplyr")
#> [1] '2.4.0'
packageVersion("DBI")
#> [1] '1.1.3'

con <- DBI::dbConnect(
  drv = RPresto::Presto(),
  host = "http://localhost",
  port = 8080,
  user = Sys.getenv("USER"),
  catalog = "memory",
  schema = "default"
)

if (DBI::dbExistsTable(con, "mtcars")) {
  DBI::dbRemoveTable(con, "mtcars")
}
tbl.mtcars <- dplyr::copy_to(dest = con, df = mtcars, name = "mtcars")
tbl.mtcars.avg <- tbl.mtcars %>%
  group_by(cyl) %>%
  summarize(wt_avg = mean(wt, na.rm = TRUE)) %>%
  compute(name = "avg", cte = TRUE)
tbl.mtcars.join <- tbl.mtcars %>%
  left_join(tbl.mtcars.avg, by = "cyl") %>%
  compute(name = "join", cte = TRUE)
tbl.mtcars.join %>%
  show_query()
#> <SQL>
#> WITH "avg" AS (
#> SELECT "cyl", AVG("wt") AS "wt_avg"
#> FROM "mtcars"
#> GROUP BY "cyl"
#> ),
#> "join" AS (
#> SELECT "mtcars".*, "wt_avg"
#> FROM "mtcars"
#> LEFT JOIN "avg"
#>   ON ("mtcars"."cyl" = "avg"."cyl")
#> )
#> SELECT *
#> FROM "join"

Created on 2023-11-06 with reprex v2.0.2