xmarcosx / azure-sandbox

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Azure Sandbox

sudo su;
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -;
curl https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list > /etc/apt/sources.list.d/mssql-release.list;

sudo apt update;
sudo apt install msodbcsql17;
CREATE EXTERNAL DATA SOURCE dbt WITH
(
	TYPE=HADOOP,
	LOCATION='abfs://dagster.dfs.core.windows.net/dagster/dagster/edfi_api/'
)

CREATE EXTERNAL FILE FORMAT json_format
WITH
(  
    FORMAT_TYPE=DELIMITEDTEXT,
    FORMAT_OPTIONS (
        FIELD_TERMINATOR='0x0b'
    )
)

SET ANSI_NULLS ON;
SET QUOTED_IDENTIFIER ON;

CREATE EXTERNAL TABLE analytics.edfi_grading_periods
( 
   [is_complete_extract] [varchar],
   [id] [varchar]
)
WITH (
	DATA_SOURCE = dbt,
	LOCATION = N'base_edfi_grading_periods/school_year=*/date_extracted=*/extract_type=*/*.json',
	FILE_FORMAT = json_format);
dbt run-operation stage_external_sources;
SELECT
    JSON_VALUE(jsonContent, '$.studentUniqueId')    AS student_unique_id,
    JSON_VALUE(jsonContent, '$.lastSurname')        AS last_surname,
    JSON_VALUE(jsonContent, '$.firstName')          AS first_name
FROM
    OPENROWSET(
        BULK 'https://dagster.dfs.core.windows.net/dagster/dagster/edfi_api/base_edfi_grading_periods/school_year=*/date_extracted=*/extract_type=*/*.json',
        FORMAT = 'CSV',
        FIELDQUOTE = '0x0b',
        FIELDTERMINATOR ='0x0b',
        ROWTERMINATOR = '0x0b'
    )
    WITH (
        jsonContent varchar(MAX)
    ) AS edfi
WHERE
    edfi.filepath(1) = 2022
    AND edfi.filepath(3) = 'records'

About


Languages

Language:Python 100.0%