UDFs

See the Login page for login information.

Generic UDFs

These are functions you write, invoked server-side.

# Namespace to charge
namespace <- "demo"
# Your function
myfunc <- function(x=50:54, y=70:74) { x + y }
# Execute the function on TileDB Cloud
result <- tiledbcloud::execute_generic_udf(udf=myfunc, namespace=namespace)
print(result)
[1] 120 122 124 126 128

Using arguments:

namespace <- "demo"
myfunc <- function(x, y) { x + y }
myargs <- list(100:104, 200:204)
result <- tiledbcloud::execute_generic_udf(udf=myfunc, args=myargs, namespace=namespace)
print(result)
[1] 300 302 304 306 308

Single-array UDFs

myfunc <- function(df) {
    vec <- as.vector(df[["a"]])
    list(min=min(vec), med=median(vec), max=max(vec))
}

result <- tiledbcloud::execute_array_udf(
    array="tiledb://TileDB-Inc/quickstart_dense",
    udf=myfunc,
    selectedRanges=list(cbind(1,2), cbind(1,2)), # Upper-left 2x2
    attrs=c("a"),
    namespace="demo" // to charge
)
print(result)
$min
[1] 1

$med
[1] 3.5

$max
[1] 6
# Run a linear regression on bill length vs body mass
myfunc <- function(df) {
  vec1 <- as.vector(df$bill_length_mm)
  vec2 <- as.vector(df$body_mass_g)
  lm.fit(cbind(1, vec2), vec1)$coefficients
}

result <- tiledbcloud::execute_array_udf(
    array="tiledb://demo/palmer_penguins2",
    udf=myfunc,
    selectedRanges=list(cbind("A", "Z"), cbind(2007,2009)),
    attrs=list("bill_length_mm", "body_mass_g")
    # NULL namespace means username namespace is charged
)
print(result)
                   vec2
27.15072200  0.00400329

Multi-array UDFs

myfunc <- function(df1, df2) {
  vec1 <- as.vector(df1[["a"]])
  vec2 <- as.vector(df2[["a"]])
  list(
    len=length(vec1) + length(vec2),
    min=min(vec1) + min(vec2),
    med=median(vec1) + median(vec2),
    max=max(vec1) + max(vec2)
  )
}

details1 <- tiledbcloud::UDFArrayDetails$new(
  uri="tiledb://TileDB-Inc/quickstart_dense",
  ranges=QueryRanges$new(
    layout=Layout$new('row-major'),
    ranges=list(cbind(1,4),cbind(1,4))
  ),
  buffers=list("a")
)

details2 <- tiledbcloud::UDFArrayDetails$new(
  uri="tiledb://TileDB-Inc/quickstart_sparse",
  ranges=QueryRanges$new(
    layout=Layout$new('row-major'),
    ranges=list(cbind(1,2),cbind(1,4))
  ),
  buffers=list("a")
)

result <- tiledbcloud::execute_multi_array_udf(
  array_list=list(details1, details2),
  udf=myfunc,
  namespace="demo"
)
print(result)
$len
[1] 19

$min
[1] 2

$med
[1] 10.5

$max
[1] 19

UDF registration

Known issue: Tags can be successfully set using the functions shown below but are not read back on get_udf_info. This is a server-side bug, affecting UDF-info readback regardless of client (Python or R).

Register a UDF and read back UDF info

Generic UDFs

library(tiledbcloud)

namespace <- 'testuser'
udfname <- 'udf-registration-test-generic'

myfunc <- function(vec, exponent) {
  sum(vec ** exponent)
}

register_udf(namespace=namespace, name=udfname, 'generic', func=myfunc)

info <- get_udf_info(namespace=namespace, name=udfname)
str(info)
$ ./cloud-register-generic-udf.r
List of 5
 $ name    : chr "udf-registration-test-generic"
 $ exec    : chr "Wzg4LDEwLDAsMCwwLDMsMCw0LDEsMSwwLDMsNSwwLDAsMCwwLDUsODUsODQsNzAsNDUsNTYsMCwwLDQsMywwLDAsMCwyNTMsMCwwLDQsMiwwLDA"| __truncated__
 $ exec_raw: chr "function (vec, exponent) \n{\n    sum(vec ** exponent)\n}"
 $ language: chr "\"r\""
 $ tags    : NULL

Single-array UDFs

library(tiledbcloud)

namespace <- "testuser"
udfname   <- "udf-registration-test-single-array-with-arg"

myfunc <- function(df, attrname, exponent) {
  vec <- as.vector(df[[attrname]])
  sum(vec ** exponent)
}

tiledbcloud::register_udf(namespace=namespace, name=udfname, type='single_array', func=myfunc)

info <- tiledbcloud::get_udf_info(namespace=namespace, name=udfname)
str(info)
List of 5
 $ name    : chr "udf-registration-test-single-array-with-arg"
 $ exec    : chr "Wzg4LDEwLDAsMCwwLDMsMCw0LDEsMSwwLDMsNSwwLDAsMCwwLDUsODUsODQsNzAsNDUsNTYsMCwwLDQsMywwLDAsMCwyNTMsMCwwLDQsMiwwLDA"| __truncated__
 $ exec_raw: chr "function (df, attrname, exponent) \n{\n    vec <- as.vector(df[[attrname]])\n    sum(vec ** exponent)\n}"
 $ language: chr "\"r\""
 $ tags    : list()

Multi-array UDFs

library(tiledbcloud)

namespace <- "testuser"
udfname   <- "udf-registration-test-multi-array-with-arg"

myfunc <- function(df1, df2, attrname) {
  vec1 <- as.vector(df1[[attrname]])
  vec2 <- as.vector(df2[[attrname]])
  list(
    len=length(vec1) + length(vec2),
    min=min(vec1) + min(vec2),
    med=median(vec1) + median(vec2),
    max=max(vec1) + max(vec2)
  )
}

tiledbcloud::register_udf(namespace=namespace, name=udfname, type='multi_array', func=myfunc)

info <- tiledbcloud::get_udf_info(namespace=namespace, name=udfname)
str(info)
[1] "GET INFO"
List of 5
 $ name    : chr "udf-registration-test-multi-array-with-arg"
 $ exec    : chr "Wzg4LDEwLDAsMCwwLDMsMCw0LDEsMSwwLDMsNSwwLDAsMCwwLDUsODUsODQsNzAsNDUsNTYsMCwwLDQsMywwLDAsMCwyNTMsMCwwLDQsMiwwLDA"| __truncated__
 $ exec_raw: chr "function (df1, df2, attrname) \n{\n    vec1 <- as.vector(df1[[attrname]])\n    vec2 <- as.vector(df2[[attrname]"| __truncated__
 $ language: chr "\"r\""
 $ tags    : list()

Invoke registered UDF

Generic UDFs

library(tiledbcloud)

namespace <- 'testuser'
udfname <- 'udf-registration-test-generic'
registered_udf_name=paste(namespace, udfname, sep='/')

result <- execute_generic_udf(
  registered_udf_name=registered_udf_name,
  args=list(vec=1:10, exponent=3),
  namespace=namespace
)
print(result)
$ ./cloud-execute-registered-generic-udf.r
[1] 3025
[1] 166375

Single-array UDFs

library(tiledbcloud)

namespace_to_charge <- 'testuser'
udf_name <- 'udf-registration-test-single-array-with-arg'
registered_udf_name=paste(namespace_to_charge, udf_name, sep='/')

result <- tiledbcloud::execute_array_udf(
  array="TileDB-Inc/quickstart_dense",
  registered_udf_name=registered_udf_name,
  args=list(attrname="a", exponent=2),
  selectedRanges=list(cbind(1,2), cbind(1,2)),
  attrs=c("a"),
  namespace="testuser" # namespace to charge
)
print(result)
[1] 196

Multi-array UDFs

#!/usr/bin/env Rscript

library(tiledbcloud)

namespace_to_charge <- 'testuser'
registered_udf_name <- 'testuser/udf-registration-test-multi-array-with-arg'

details1 <- tiledbcloud::UDFArrayDetails$new(
  uri="tiledb://TileDB-Inc/quickstart_dense",
  ranges=QueryRanges$new(
    layout=Layout$new('row-major'),
    ranges=list(cbind(1,4),cbind(1,4))
  ),
  buffers=list("a")
)

details2 <- tiledbcloud::UDFArrayDetails$new(
  uri="tiledb://TileDB-Inc/quickstart_sparse",
  ranges=QueryRanges$new(
    layout=Layout$new('row-major'),
    ranges=list(cbind(1,2),cbind(1,4))
  ),
  buffers=list("a")
)

result <- tiledbcloud::execute_multi_array_udf(
  array_list=list(details1, details2),
  registered_udf_name=registered_udf_name,
  args=list(attrname="a"),
  namespace=namespace_to_charge
)
print(result)
$len
[1] 19

$min
[1] 2

$med
[1] 10.5

$max
[1] 19

Unregister UDF

This is the same for generic, single_array, and multi_array registered UDFs.

library(tiledbcloud)

namespace <- 'testuser'
udfname <- 'udf-registration-test-generic'
deregister_udf(namespace=namespace, name=udfname)