This project includes dataset and the script needed to load the source data into a PostgreSQL table. Open your PostgreSQL database with username and password "postgres" using your favourite database manager.
cd kafka-ksqldb-docker
sudo docker compose up -d
python3 load_postgres.py
sudo docker exec -it ksqldb ksql
CREATE SOURCE CONNECTOR jdbc_connector WITH (
'connector.class' = 'io.confluent.connect.jdbc.JdbcSourceConnector',
'connection.url' = 'jdbc:postgresql://postgresql:5432/postgres',
'connection.user' = 'postgres',
'connection.password' = 'postgres',
'topic.prefix' = 'jdbc_',
'table.whitelist' = 'application_record',
'mode' = 'bulk'
);
CREATE STREAM stream_table (
ID INTEGER,
CODE_GENDER STRING,
FLAG_OWN_CAR STRING,
FLAG_OWN_REALTY STRING,
CNT_CHILDREN INTEGER,
AMT_INCOME_TOTAL DOUBLE,
NAME_INCOME_TYPE STRING,
NAME_EDUCATION_TYPE STRING,
NAME_FAMILY_STATUS STRING,
NAME_HOUSING_TYPE STRING,
DAYS_BIRTH INTEGER,
DAYS_EMPLOYED INTEGER,
FLAG_MOBIL INTEGER,
FLAG_WORK_PHONE INTEGER,
FLAG_PHONE INTEGER,
FLAG_EMAIL INTEGER,
OCCUPATION_TYPE STRING,
CNT_FAM_MEMBERS INTEGER
)
WITH (kafka_topic='jdbc_application_record', format='json', partitions=1);
SELECT rowtime, * FROM stream_table EMIT CHANGES;
CREATE TABLE final_table AS
SELECT
ID AS client_id,
CODE_GENDER AS gender,
MAX(AMT_INCOME_TOTAL) AS annual_income,
NAME_INCOME_TYPE AS income_source,
NAME_EDUCATION_TYPE AS education_level,
NAME_FAMILY_STATUS AS marriage_status
FROM stream_table
GROUP BY ID, CODE_GENDER, NAME_INCOME_TYPE, NAME_EDUCATION_TYPE, NAME_FAMILY_STATUS
EMIT CHANGES;
SELECT * FROM final_table;
localhost:9021