ProBrian / gandiva_rust_udf

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Gandiva Rust UDF

This project is a collection of UDFs written in Rust for Apache Arrow Gandiva.

How to write a new UDF

  • new a function package under the repo root
cargo new my_func --lib
  • make type cdylib and add my_func as a workspace member in gandiva_rust_udf/Cargo.toml
# in gandiva_rust_udf/Cargo.toml
...
[lib]
crate-type = ["cdylib"]

[workspace]
members = [
    "udf_registry",
    "my_func",
]
resolver = "2"

[dependencies]
libc = "0.2.152"
gandiva_rust_udf_macro = { version = "0.1.4" }
gandiva_rust_udf_shared = { version = "0.1.4" }
  • go to gandiva_rust_udf/my_func/Cargo.toml and add gandiva_rust_udf_macro and gandiva_rust_udf_shared in dependencies
# in gandiva_rust_udf/my_func/Cargo.toml
[package]
name = "my_func"
version = "0.0.1"
edition = "2021"

[lib]
name = "my_func"
path = "src/lib.rs"

[dependencies]
# if your function requires string as parameters or return value, you should add libc as a dependency
libc = { workspace = true }
gandiva_rust_udf_macro = { workspace = true }
gandiva_rust_udf_shared = { workspace = true }
  • code the function in gandiva_rust_udf/my_func/src/lib.rs for example
use gandiva_rust_udf_macro::udf;

#[udf]
fn add_one(x: i64) -> i64 {
    x + 1
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_add_one() {
        let result = add_one(6);
        assert_eq!(result, 7);
    }
}
  • install gandiva_rust_udf_build to generate udf registry
cargo install gandiva_rust_udf_build
  • back to gandiva_rust_udf and generate udf registry
gen-udf-reg
  • build gandiva_rust_udf and get the lib file libgdv_rust_udf_cus.dylib or libgdv_rust_udf_cus.so
cargo build --lib --release --workspace
  • copy the lib file to stonewave path
${STONEWAVE_HOME}/var/scalar_funcs

How to create a different project like this

  • create a new cargo package named gandiva_rust_udf
cargo new gandiva_rust_udf_cus --lib

Function List

signature
utf8 bar(int64, int64, int64, int64)
utf8 conv(utf8, int64, int64)
utf8 format(utf8, utf8)
utf8 format(utf8, utf8, utf8)
utf8 format(utf8, utf8, utf8, utf8)
utf8 format(utf8, utf8, utf8, utf8, utf8)
utf8 format(utf8, utf8, utf8, utf8, utf8, utf8)
bool is_ipv4(utf8)
bool is_ipv6(utf8)
bool is_ipv4_loopback(utf8)
bool is_ipv6_loopback(utf8)
bool ipv4_to_ipv6(utf8)
bool is_ascii(utf8)
int64 gcd(int64, int64)
int64 lcm(int64, int64)
float64 jaro_similarity(utf8, utf8)
float64 jaro_winkler_similarity(utf8, utf8)
int64 damerau_levenshtein_distance(utf8, utf8)
utf8 hamming_distance(utf8, utf8)
float64 normalized_damerau_levenshtein_distance(utf8, utf8)
float64 normalized_levenshtein_distance(utf8, utf8)
int64 osa_distance(utf8, utf8)
float64 sorensen_dice_similarity(utf8, utf8)
utf8 protocol(utf8)
utf8 domain(utf8)
utf8 domain_without_www(utf8)
utf8 top_level_domain(utf8)
utf8 port(utf8)
utf8 path(utf8)
utf8 path_full(utf8)
utf8 query_string(utf8)
utf8 fragment(utf8)
utf8 netloc_username(utf8)
utf8 netloc_password(utf8)
utf8 netloc(utf8)
bool is_valid_url(utf8)
utf8 cut_www(utf8)
utf8 cut_query_string(utf8)
utf8 cut_query_string_and_fragment(utf8)
utf8 uuid()
bool valid_json(utf8)

About

License:Apache License 2.0


Languages

Language:Rust 96.8%Language:Shell 2.6%Language:Just 0.6%