Prepared Statements Incorrectly Storing Partition Information
proddata opened this issue · comments
CrateDB version
5.6.4
CrateDB setup information
any (single, multi, cloud)
Problem description
Queries instantly execute within a longer standing connection using psycopg3
Steps to Reproduce
CREATE TABLE scheduler_issue.base (
ts TIMESTAMP,
ts_g generated always as date_trunc('minute',ts)
) PARTITIONED BY (ts_g);
CREATE TABLE scheduler_issue.target (
ts TIMESTAMP
);
import asyncio
from datetime import datetime
import psycopg
stmt1 = """
INSERT INTO
scheduler_issue.base (ts)
SELECT
ts
FROM
generate_series(
date_trunc('minute', now()),
date_trunc('minute', now() + '1 minute'::INTERVAL),
5
) g (ts);"""
stmt2 = """INSERT INTO scheduler_issue.target (ts)
SELECT a.ts FROM scheduler_issue.base a
LEFT JOIN scheduler_issue.base b USING (ts)
LIMIT 100;"""
stmt3 = """ DELETE FROM scheduler_issue.base
WHERE ts_g < now() - '3 minute'::INTERVAL;"""
async def run():
# Replace these with your database connection details
user = 'crate'
password = ''
database = 'crate'
host = 'localhost'
conn = await psycopg.AsyncConnection.connect(
dbname=database, user=user, password=password, host=host)
print("Connected to the database.")
# Establishing a connection to the database
try:
async with conn: # Using the connection as an async context manager
async with conn.cursor() as cur:
while True:
print(f"Executing at {datetime.now()}")
# First SQL statement
await cur.execute(stmt1)
result = cur.rowcount
print(f"Time: {datetime.now()}, Affected Rows: {result}")
# Second SQL statement
await cur.execute(stmt2)
result = cur.rowcount
print(f"Time: {datetime.now()}, Affected Rows: {result}")
# Third SQL statement
await cur.execute(stmt3)
result = cur.rowcount
print(f"Time: {datetime.now()}, Affected Rows: {result}")
# Wait for one minute before the next iteration
await asyncio.sleep(60)
except Exception as e:
print("An error occurred:", e)
finally:
# Ensure the connection is closed properly
if not conn.closed:
await conn.close()
print("Connection closed.")
# Running the asynchronous function
asyncio.run(run())
output:
Executing at 2024-04-22 15:10:20.172728
Time: 2024-04-22 15:10:20.784048, Affected Rows: 12001
Time: 2024-04-22 15:10:20.807172, Affected Rows: 0 <-- table is not refreshed
Time: 2024-04-22 15:10:20.824693, Affected Rows: 0
Executing at 2024-04-22 15:11:20.827274
Time: 2024-04-22 15:11:21.154763, Affected Rows: 12001
Time: 2024-04-22 15:11:24.159505, Affected Rows: 100 <-- table is refreshed, insert successful
Time: 2024-04-22 15:11:24.164031, Affected Rows: 0
Executing at 2024-04-22 15:12:24.165954
Time: 2024-04-22 15:12:24.377397, Affected Rows: 12001
Time: 2024-04-22 15:12:30.074639, Affected Rows: 100
Time: 2024-04-22 15:12:30.078405, Affected Rows: 0
Executing at 2024-04-22 15:13:30.080687
Time: 2024-04-22 15:13:30.279791, Affected Rows: 12001
Time: 2024-04-22 15:13:38.045847, Affected Rows: 100
Time: 2024-04-22 15:13:38.071137, Affected Rows: -1
Executing at 2024-04-22 15:14:38.072112
Time: 2024-04-22 15:14:38.225661, Affected Rows: 12001
Time: 2024-04-22 15:14:45.784156, Affected Rows: 100
Time: 2024-04-22 15:14:45.805235, Affected Rows: -1
Executing at 2024-04-22 15:15:45.806786
Time: 2024-04-22 15:15:46.161335, Affected Rows: 12001
Time: 2024-04-22 15:15:53.979985, Affected Rows: 100
Time: 2024-04-22 15:15:54.003209, Affected Rows: -1
Executing at 2024-04-22 15:16:54.004968
Time: 2024-04-22 15:16:54.183757, Affected Rows: 12001
Time: 2024-04-22 15:16:54.199846, Affected Rows: 0 <-- fails within ms
Time: 2024-04-22 15:16:54.201445, Affected Rows: -1
Executing at 2024-04-22 15:17:54.203813
Time: 2024-04-22 15:17:54.444582, Affected Rows: 12001
Time: 2024-04-22 15:17:54.448275, Affected Rows: 0 <-- fails within ms
Time: 2024-04-22 15:17:54.449818, Affected Rows: -1
Executing at 2024-04-22 15:18:54.451699
Time: 2024-04-22 15:18:54.583264, Affected Rows: 12001
Time: 2024-04-22 15:18:54.587881, Affected Rows: 0 <-- fails within ms
Time: 2024-04-22 15:18:54.589297, Affected Rows: -1
Actual Result
Query executes successfully instantly, 0 records inserted
Expected Result
Query executes succesfully after a few seconds, 100 records inserted
What is suspicious:
- the first 6 executions work fine
- the 7th and following are partially failling
Looking at a wireshark recording there seems to be a behaviour change how asyncpg (3) is communicating with CrateDB all of a sudden (from second 349 - all logs before look the same):
Wireshark
![image](https://github.com/crate/crate/assets/23557193/e6dafdc0-6a06-4879-82d3-eab55509a683)
The inserts follow simple queries for the first executions, i.e.:
<Z (ready for query)
>Q (simple query)
<C (command completion INSERT 0 100)
<Z (ready for query)
Forcing prepared statement with execute(stmt1, prepare=True)
makes it fail quicker:
await cur.execute(stmt1, prepare=True)
result = cur.rowcount
print(f"Time: {datetime.now()}, Affected Rows: {result}")
# Second SQL statement
await cur.execute(stmt2 , prepare=True)
result = cur.rowcount
print(f"Time: {datetime.now()}, Affected Rows: {result}")
# Third SQL statement
await cur.execute(stmt3, prepare=True)
result = cur.rowcount
print(f"Time: {datetime.now()}, Affected Rows: {result}")
Executing at 2024-04-22 21:04:22.818460
Time: 2024-04-22 21:04:22.905894, Affected Rows: 12001
Time: 2024-04-22 21:04:30.486069, Affected Rows: 100
Time: 2024-04-22 21:04:30.500197, Affected Rows: -1
Executing at 2024-04-22 21:05:30.502028
Time: 2024-04-22 21:05:30.601565, Affected Rows: 12001
Time: 2024-04-22 21:05:30.611416, Affected Rows: 0 <-- fails already
Time: 2024-04-22 21:05:30.612167, Affected Rows: -1
Executing at 2024-04-22 21:06:30.613892
Time: 2024-04-22 21:06:30.737932, Affected Rows: 12001
Time: 2024-04-22 21:06:30.740307, Affected Rows: 0
Time: 2024-04-22 21:06:30.741570, Affected Rows: -1
Executing at 2024-04-22 21:07:30.743420
Time: 2024-04-22 21:07:30.854913, Affected Rows: 12001
Time: 2024-04-22 21:07:30.856737, Affected Rows: 0
Time: 2024-04-22 21:07:30.857663, Affected Rows: -1
So this seems to relate to prepared statements and how CrateDB treats them.
Psycopg3 with default settings automatically will prepare statements if they are run more than 5 times on a connection, if not explictily disabled. This prepared statements seem to behave strange when used with CrateDB. It almost seems like CrateDB is keeping a state about what partitions are affected from a prepared statement, but never updates it. This can be replicated with the following code example which does roughly the following:
- Create a partitioned table
- Prepare and run DELETE statement -> nothing gets deleted as the table is empty
- Fill table with records
- Refresh table just to be sure
- Run same DELETE statement again -> nothing gets deleted, but the table is not empty 🤯
- lets wait 15 seconds and try again ...
- Run same DELETE statement again -> nothing gets deleted, but the table is not empty 🤯
Code Example
import asyncio
from datetime import datetime
import psycopg
# SQL statements for database operations
CREATE_TABLE = """
CREATE TABLE IF NOT EXISTS scheduler_issue.base (
ts TIMESTAMP,
ts_g GENERATED ALWAYS AS date_trunc('minute', ts)
) PARTITIONED BY (ts_g);
"""
INSERT_DATA = """
INSERT INTO scheduler_issue.base (ts)
SELECT ts
FROM generate_series(
date_trunc('minute', now()),
date_trunc('minute', now() + '1 minute'::INTERVAL),
'5 seconds'::INTERVAL
) g (ts);
"""
DELETE_OLD_DATA = """
DELETE FROM scheduler_issue.base
"""
REFRESH_TABLE = "REFRESH TABLE scheduler_issue.base;"
async def run():
# Database connection details
user = 'crate'
password = ''
database = 'crate'
host = 'localhost'
# Connect to the database asynchronously
conn = await psycopg.AsyncConnection.connect(
dbname=database, user=user, password=password, host=host)
print("Connected to the database.")
try:
async with conn: # Using the connection within an async context manager
async with conn.cursor() as cur:
# Executing SQL commands
await cur.execute(CREATE_TABLE)
print(f"Table created")
#await asyncio.sleep(30)
await cur.execute(DELETE_OLD_DATA, prepare=True)
print(f"Run prepared delete first time on empty table, Affected Rows: {cur.rowcount}")
await cur.execute(INSERT_DATA)
print(f"Data inserted at, Affected Rows: {cur.rowcount}")
await cur.execute(REFRESH_TABLE)
print(f"Table refreshed")
await cur.execute(DELETE_OLD_DATA, prepare=True)
print(f"Run prepared delete again on filled table, Affected Rows: {cur.rowcount}")
print("Sleeping for 15 seconds")
await asyncio.sleep(15)
await cur.execute(DELETE_OLD_DATA, prepare=True)
print(f"Run prepared delete again on filled table, Affected Rows: {cur.rowcount}")
except Exception as e:
print(f"An error occurred: {e}")
finally:
# Close the connection
if not conn.closed:
await conn.close()
print("Connection closed.")
# Running the asynchronous function
asyncio.run(run())
Output ❌
Connected to the database.
Table created
Run prepared delete first time on empty table, Affected Rows: 0
Data inserted at, Affected Rows: 13
Table refreshed
Run prepared delete again on filled table, Affected Rows: 0
Sleeping for 15 seconds
Run prepared delete again on filled table, Affected Rows: 0
Connection closed.
With a non-partitioned table it works ✅
Connected to the database.
Table created
Run prepared delete first time on empty table, Affected Rows: 0
Data inserted at, Affected Rows: 13
Table refreshed
Run prepared delete again on filled table, Affected Rows: 13
Sleeping for 15 seconds
Run prepared delete again on filled table, Affected Rows: 0
Connection closed.