--- title: "UDFs" output: github_document vignette: | %\VignetteIndexEntry{UDFs} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- See the [Login page](Login.md) for login information. ## Generic UDFs These are functions you write, invoked server-side. ``` # Namespace to charge namespace <- "demo" # Your function myfunc <- function(x=50:54, y=70:74) { x + y } # Execute the function on TileDB Cloud result <- tiledbcloud::execute_generic_udf(udf=myfunc, namespace=namespace) print(result) ``` ``` [1] 120 122 124 126 128 ``` Using arguments: ``` namespace <- "demo" myfunc <- function(x, y) { x + y } myargs <- list(100:104, 200:204) result <- tiledbcloud::execute_generic_udf(udf=myfunc, args=myargs, namespace=namespace) print(result) ``` ``` [1] 300 302 304 306 308 ``` ## Single-array UDFs ``` myfunc <- function(df) { vec <- as.vector(df[["a"]]) list(min=min(vec), med=median(vec), max=max(vec)) } result <- tiledbcloud::execute_array_udf( array="tiledb://TileDB-Inc/quickstart_dense", udf=myfunc, selectedRanges=list(cbind(1,2), cbind(1,2)), # Upper-left 2x2 attrs=c("a"), namespace="demo" // to charge ) print(result) ``` ``` $min [1] 1 $med [1] 3.5 $max [1] 6 ``` ``` # Run a linear regression on bill length vs body mass myfunc <- function(df) { vec1 <- as.vector(df$bill_length_mm) vec2 <- as.vector(df$body_mass_g) lm.fit(cbind(1, vec2), vec1)$coefficients } result <- tiledbcloud::execute_array_udf( array="tiledb://demo/palmer_penguins2", udf=myfunc, selectedRanges=list(cbind("A", "Z"), cbind(2007,2009)), attrs=list("bill_length_mm", "body_mass_g") # NULL namespace means username namespace is charged ) print(result) ``` ``` vec2 27.15072200 0.00400329 ``` ## Multi-array UDFs ``` myfunc <- function(df1, df2) { vec1 <- as.vector(df1[["a"]]) vec2 <- as.vector(df2[["a"]]) list( len=length(vec1) + length(vec2), min=min(vec1) + min(vec2), med=median(vec1) + median(vec2), max=max(vec1) + max(vec2) ) } details1 <- tiledbcloud::UDFArrayDetails$new( uri="tiledb://TileDB-Inc/quickstart_dense", ranges=QueryRanges$new( layout=Layout$new('row-major'), ranges=list(cbind(1,4),cbind(1,4)) ), buffers=list("a") ) details2 <- tiledbcloud::UDFArrayDetails$new( uri="tiledb://TileDB-Inc/quickstart_sparse", ranges=QueryRanges$new( layout=Layout$new('row-major'), ranges=list(cbind(1,2),cbind(1,4)) ), buffers=list("a") ) result <- tiledbcloud::execute_multi_array_udf( array_list=list(details1, details2), udf=myfunc, namespace="demo" ) print(result) ``` ``` $len [1] 19 $min [1] 2 $med [1] 10.5 $max [1] 19 ``` ## UDF registration Known issue: Tags can be successfully set using the functions shown below but are not read back on `get_udf_info`. This is a server-side bug, affecting UDF-info readback regardless of client (Python or R). ## Register a UDF and read back UDF info ### Generic UDFs ``` library(tiledbcloud) namespace <- 'testuser' udfname <- 'udf-registration-test-generic' myfunc <- function(vec, exponent) { sum(vec ** exponent) } register_udf(namespace=namespace, name=udfname, 'generic', func=myfunc) info <- get_udf_info(namespace=namespace, name=udfname) str(info) ``` ``` $ ./cloud-register-generic-udf.r List of 5 $ name : chr "udf-registration-test-generic" $ exec : chr "Wzg4LDEwLDAsMCwwLDMsMCw0LDEsMSwwLDMsNSwwLDAsMCwwLDUsODUsODQsNzAsNDUsNTYsMCwwLDQsMywwLDAsMCwyNTMsMCwwLDQsMiwwLDA"| __truncated__ $ exec_raw: chr "function (vec, exponent) \n{\n sum(vec ** exponent)\n}" $ language: chr "\"r\"" $ tags : NULL ``` ### Single-array UDFs ``` library(tiledbcloud) namespace <- "testuser" udfname <- "udf-registration-test-single-array-with-arg" myfunc <- function(df, attrname, exponent) { vec <- as.vector(df[[attrname]]) sum(vec ** exponent) } tiledbcloud::register_udf(namespace=namespace, name=udfname, type='single_array', func=myfunc) info <- tiledbcloud::get_udf_info(namespace=namespace, name=udfname) str(info) ``` ``` List of 5 $ name : chr "udf-registration-test-single-array-with-arg" $ exec : chr "Wzg4LDEwLDAsMCwwLDMsMCw0LDEsMSwwLDMsNSwwLDAsMCwwLDUsODUsODQsNzAsNDUsNTYsMCwwLDQsMywwLDAsMCwyNTMsMCwwLDQsMiwwLDA"| __truncated__ $ exec_raw: chr "function (df, attrname, exponent) \n{\n vec <- as.vector(df[[attrname]])\n sum(vec ** exponent)\n}" $ language: chr "\"r\"" $ tags : list() ``` ### Multi-array UDFs ``` library(tiledbcloud) namespace <- "testuser" udfname <- "udf-registration-test-multi-array-with-arg" myfunc <- function(df1, df2, attrname) { vec1 <- as.vector(df1[[attrname]]) vec2 <- as.vector(df2[[attrname]]) list( len=length(vec1) + length(vec2), min=min(vec1) + min(vec2), med=median(vec1) + median(vec2), max=max(vec1) + max(vec2) ) } tiledbcloud::register_udf(namespace=namespace, name=udfname, type='multi_array', func=myfunc) info <- tiledbcloud::get_udf_info(namespace=namespace, name=udfname) str(info) ``` ``` [1] "GET INFO" List of 5 $ name : chr "udf-registration-test-multi-array-with-arg" $ exec : chr "Wzg4LDEwLDAsMCwwLDMsMCw0LDEsMSwwLDMsNSwwLDAsMCwwLDUsODUsODQsNzAsNDUsNTYsMCwwLDQsMywwLDAsMCwyNTMsMCwwLDQsMiwwLDA"| __truncated__ $ exec_raw: chr "function (df1, df2, attrname) \n{\n vec1 <- as.vector(df1[[attrname]])\n vec2 <- as.vector(df2[[attrname]"| __truncated__ $ language: chr "\"r\"" $ tags : list() ``` ## Invoke registered UDF ### Generic UDFs ``` library(tiledbcloud) namespace <- 'testuser' udfname <- 'udf-registration-test-generic' registered_udf_name=paste(namespace, udfname, sep='/') result <- execute_generic_udf( registered_udf_name=registered_udf_name, args=list(vec=1:10, exponent=3), namespace=namespace ) print(result) ``` ``` $ ./cloud-execute-registered-generic-udf.r [1] 3025 [1] 166375 ``` ### Single-array UDFs ``` library(tiledbcloud) namespace_to_charge <- 'testuser' udf_name <- 'udf-registration-test-single-array-with-arg' registered_udf_name=paste(namespace_to_charge, udf_name, sep='/') result <- tiledbcloud::execute_array_udf( array="TileDB-Inc/quickstart_dense", registered_udf_name=registered_udf_name, args=list(attrname="a", exponent=2), selectedRanges=list(cbind(1,2), cbind(1,2)), attrs=c("a"), namespace="testuser" # namespace to charge ) print(result) ``` ``` [1] 196 ``` ### Multi-array UDFs ``` #!/usr/bin/env Rscript library(tiledbcloud) namespace_to_charge <- 'testuser' registered_udf_name <- 'testuser/udf-registration-test-multi-array-with-arg' details1 <- tiledbcloud::UDFArrayDetails$new( uri="tiledb://TileDB-Inc/quickstart_dense", ranges=QueryRanges$new( layout=Layout$new('row-major'), ranges=list(cbind(1,4),cbind(1,4)) ), buffers=list("a") ) details2 <- tiledbcloud::UDFArrayDetails$new( uri="tiledb://TileDB-Inc/quickstart_sparse", ranges=QueryRanges$new( layout=Layout$new('row-major'), ranges=list(cbind(1,2),cbind(1,4)) ), buffers=list("a") ) result <- tiledbcloud::execute_multi_array_udf( array_list=list(details1, details2), registered_udf_name=registered_udf_name, args=list(attrname="a"), namespace=namespace_to_charge ) print(result) ``` ``` $len [1] 19 $min [1] 2 $med [1] 10.5 $max [1] 19 ``` ## Unregister UDF This is the same for `generic`, `single_array`, and `multi_array` registered UDFs. ``` library(tiledbcloud) namespace <- 'testuser' udfname <- 'udf-registration-test-generic' deregister_udf(namespace=namespace, name=udfname) ```