Overview
TileDB supports a wide range of date and time types as shown in the documention for Datetimes.
The supported range of dates and times correspond to the dates and times supported by the Python extension numpy, and is described in its documentation.
import numpy as np
import datetime
= "1971-02-03 04:05:06" # third day of second month of first year past epoch
day
'Y').astype('int64'), # extract year, month, ... as an int64
[np.datetime64(day, 'M').astype('int64'),
np.datetime64(day, 'D').astype('int64'),
np.datetime64(day, 'h').astype('int64'),
np.datetime64(day, 'm').astype('int64'),
np.datetime64(day, 's').astype('int64')]
np.datetime64(day, ## [1, 13, 398, 9556, 573365, 34401906]
R has date and datetime support built-in. The Date
supports dates using an integer count since the epoch, and matches the D
value from the previous example. Similarly, the ‘compact’ POSIXct
representation of a Datetime
uses the number of seconds since the epoch and corresponds to the s
value from the previous example.
as.Date(398, origin="1970-01-01") # conversion from numeric input requires 'origin'
## [1] "1971-02-03"
as.POSIXct(34401906, origin="1970-01-01", tz="UTC", usetz=TRUE)
## [1] "1971-02-03 04:05:06 UTC"
R can reconstruct dates and times from the numpy representation using the epoch as a ‘base’ date along with time period calculations. This can be done using base R (adding to Date
or POSIXct
objects), by using the lubridate package for a number of intermediate formats and by using the nanotime package for higher-resolution periods and intervals.
suppressMessages(library(lubridate))
ymd("1970-01-01") + c(years(1), months(13), days(398))
## [1] "1971-01-01" "1971-02-01" "1971-02-03"
ymd_hms("1970-01-01 00:00:00") + c(hours(9556), minutes(573365))
## [1] "1971-02-03 04:00:00 UTC" "1971-02-03 04:05:00 UTC"
suppressMessages(library(nanotime))
nanotime("1970-01-01T00:00:00+00:00") + nanoduration(hours=2, minutes=3,
seconds=4, nanoseconds=5)
## [1] 1970-01-01T02:03:04.000000005+00:00
Python and R Interoperability
Coarsest: Year
Python
import numpy as np
import sys
import os
import tiledb
= "/tmp/tiledb/dt_year"
uri = tiledb.Domain(tiledb.Dim(name="rows",
dom =(np.datetime64('2001-01-01'), np.datetime64('2030-12-31')),
domain=np.timedelta64(10, 'Y'),
tile=np.datetime64('', 'Y')))
dtype= tiledb.ArraySchema(domain=dom,
schema =True,
sparse=[tiledb.Attr(name="a", dtype=np.int32)])
attrsif (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
= [np.datetime64('2001-01-01'),np.datetime64('2002-01-01'),np.datetime64('2003-01-01')]
I = np.array(([1,2,3]))
data = data A[I]
R
library(tiledb)
<- "/tmp/tiledb/dt_year"
uri <- tiledb_array(uri, as.data.frame=TRUE)
arr
arr[]## rows a
## 1 2001-01-01 1
## 2 2002-01-01 2
## 3 2003-01-01 3
## we can also look at 'raw' int64 values:
datetimes_as_int64(arr) <- TRUE
arr[]## rows a
## 1 31 1
## 2 32 2
## 3 33 3
Day
Python
import numpy as np
import sys
import os
import tiledb
= "/tmp/tiledb/dt_day"
uri = tiledb.Domain(tiledb.Dim(name="rows",
dom =(np.datetime64('2001-01-01'), np.datetime64('2030-12-31')),
domain=np.timedelta64(10, 'D'),
tile=np.datetime64('', 'D')))
dtype= tiledb.ArraySchema(domain=dom,
schema =True,
sparse=[tiledb.Attr(name="a", dtype=np.int32)])
attrsif (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
= [np.datetime64('2001-01-01'),np.datetime64('2001-01-02'),np.datetime64('2001-01-03')]
I = np.array(([1,2,3]))
data = data A[I]
R
library(tiledb)
<- "/tmp/tiledb/dt_day"
uri <- tiledb_array(uri, as.data.frame=TRUE)
arr
arr[]## rows a
## 1 2001-01-01 1
## 2 2001-01-02 2
## 3 2001-01-03 3
Minute
Python
import numpy as np
import sys
import os
import tiledb
= "/tmp/tiledb/dt_min"
uri = tiledb.Domain(tiledb.Dim(name="rows",
dom =(np.datetime64('2001-01-01'), np.datetime64('2030-12-31')),
domain=np.timedelta64(10, 'm'),
tile=np.datetime64('', 'm')))
dtype= tiledb.ArraySchema(domain=dom,
schema =True,
sparse=[tiledb.Attr(name="a", dtype=np.int32)])
attrsif (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
= [np.datetime64('2001-01-01 00:00'),
I '2001-01-02 00:01'),
np.datetime64('2001-01-03 00:02')]
np.datetime64(= np.array(([1,2,3]))
data = data A[I]
R
library(tiledb)
<- "/tmp/tiledb/dt_min"
uri <- tiledb_array(uri, as.data.frame=TRUE)
arr
arr[]## rows a
## 1 2001-01-01 00:00:00 1
## 2 2001-01-02 00:01:00 2
## 3 2001-01-03 00:02:00 3
Millisecond
Python
import numpy as np
import sys
import os
import tiledb
= "/tmp/tiledb/dt_ms"
uri = tiledb.Domain(tiledb.Dim(name="rows",
dom =(np.datetime64('1969-01-01'), np.datetime64('2030-12-31')),
domain=np.timedelta64(10, 'ms'),
tile=np.datetime64('', 'ms')))
dtype= tiledb.ArraySchema(domain=dom,
schema =True,
sparse=[tiledb.Attr(name="a", dtype=np.int32)])
attrsif (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
= [np.datetime64('1970-01-01 00:00:00.001'),
I '1980-01-01 00:00:00.002'),
np.datetime64('1990-01-01 00:00:00.003'),
np.datetime64('2000-01-01 00:00:00.004'),
np.datetime64('2010-01-01 00:00:00.005'),
np.datetime64('2020-01-01 00:00:00.006')]
np.datetime64(= np.array(([1,2,3,4,5,6]))
data = data A[I]
R
library(tiledb)
<- "/tmp/tiledb/dt_ms"
uri <- tiledb_array(uri, as.data.frame=TRUE)
arr
arr[]## rows a
## 1 1970-01-01 00:00:00.001 1
## 2 1980-01-01 00:00:00.002 2
## 3 1990-01-01 00:00:00.003 3
## 4 2000-01-01 00:00:00.004 4
## 5 2010-01-01 00:00:00.005 5
## 6 2020-01-01 00:00:00.006 6
Microsecond
Python
import numpy as np
import sys
import os
import tiledb
= "/tmp/tiledb/dt_us"
uri = tiledb.Domain(tiledb.Dim(name="rows",
dom =(np.datetime64('1969-01-01'), np.datetime64('2030-12-31')),
domain=np.timedelta64(10, 'us'),
tile=np.datetime64('', 'us')))
dtype= tiledb.ArraySchema(domain=dom,
schema =True,
sparse=[tiledb.Attr(name="a", dtype=np.int32)])
attrsif (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
= [np.datetime64('1970-01-01 00:00:00.000001'),
I '1980-01-01 00:00:00.000002'),
np.datetime64('1990-01-01 00:00:00.000003'),
np.datetime64('2000-01-01 00:00:00.000004'),
np.datetime64('2010-01-01 00:00:00.000005'),
np.datetime64('2020-01-01 00:00:00.000006')]
np.datetime64(= np.array(([1,2,3,4,5,6]))
data = data A[I]
R
library(tiledb)
<- "/tmp/tiledb/dt_us"
uri <- tiledb_array(uri, as.data.frame=TRUE)
arr
arr[]## rows a
## 1 1970-01-01 00:00:00.000001 1
## 2 1980-01-01 00:00:00.000001 2
## 3 1990-01-01 00:00:00.000002 3
## 4 2000-01-01 00:00:00.000003 4
## 5 2010-01-01 00:00:00.000005 5
## 6 2020-01-01 00:00:00.000005 6
Nanosecond
Python
import numpy as np
import sys
import os
import tiledb
= "/tmp/tiledb/dt_ns"
uri = tiledb.Domain(tiledb.Dim(name="rows",
dom =(np.datetime64('1969-01-01'), np.datetime64('2030-12-31')),
domain=np.timedelta64(10, 'ns'),
tile=np.datetime64('', 'ns')))
dtype= tiledb.ArraySchema(domain=dom,
schema =True,
sparse=[tiledb.Attr(name="a", dtype=np.int32)])
attrsif (os.path.isdir(uri)):
tiledb.VFS().remove_dir(uri)
tiledb.SparseArray.create(uri, schema)
with tiledb.SparseArray(uri, mode='w') as A:
= [np.datetime64('1970-01-01 00:00:00.000000001'),
I '1980-01-01 00:00:00.000000002'),
np.datetime64('1990-01-01 00:00:00.000000003'),
np.datetime64('2000-01-01 00:00:00.000000004'),
np.datetime64('2010-01-01 00:00:00.000000005'),
np.datetime64('2020-01-01 00:00:00.000000006')]
np.datetime64(= np.array(([1,2,3,4,5,6]))
data = data A[I]
R
library(tiledb)
<- "/tmp/tiledb/dt_ns"
uri <- tiledb_array(uri, as.data.frame=TRUE)
arr
arr[]## rows a
## 1 1970-01-01T00:00:00.000000001+00:00 1
## 2 1980-01-01T00:00:00.000000002+00:00 2
## 3 1990-01-01T00:00:00.000000003+00:00 3
## 4 2000-01-01T00:00:00.000000004+00:00 4
## 5 2010-01-01T00:00:00.000000005+00:00 5
## 6 2020-01-01T00:00:00.000000006+00:00 6
Use integer64 Directly
Sometimes we may want to access the date or datetimes value in their native integer64
format. To do so, we set a toggle when opening the array as shown in the following example which uses the array from the preceding example (at resolution of nanosecond).
library(tiledb)
<- "/tmp/tiledb/dt_ns"
uri <- tiledb_array(uri, as.data.frame=TRUE, datetimes_as_int64=TRUE)
arr
arr[]## rows a
## 1 1 1
## 2 315532800000000002 2
## 3 631152000000000003 3
## 4 946684800000000004 4
## 5 1262304000000000005 5
## 6 1577836800000000006 6
We can also write integer64
types. The following example adds two extra rows:
library(tiledb)
<- "/tmp/tiledb/dt_ns"
uri <- tiledb_array(uri, as.data.frame=TRUE, datetimes_as_int64=TRUE)
arr <- data.frame( rows=bit64::as.integer64(2:3), a=102:103)
arr[]
arr[]## rows a
## 1 1 1
## 2 2 102
## 3 3 103
## 4 315532800000000002 2
## 5 631152000000000003 3
## 6 946684800000000004 4
## 7 1262304000000000005 5
## 8 1577836800000000006 6