Using omxr

Greg Macfarlane

2020-09-17

set.seed(10)
library(omxr)

Create and write

.omx files are HDF5 containers storing many different matrices and their attributes. You can create a new container with the create_omx command.

zones <- 1:10
omxfile <- tempfile(fileext = ".omx")

Let’s start with a 10-zone system with random trips and costs.

trips <- matrix(rnorm(n = length(zones)^2, 200, 50),  
                nrow = length(zones), ncol = length(zones))
cost <- matrix(rlnorm(n = length(zones)^2, 1, 1),
               nrow = length(zones), ncol = length(zones))

We can store these in the omx container.

write_omx(file = omxfile, matrix = trips, "trips", 
          description = "Total Trips")
## Warning in rhdf5::h5createDataset(H5File, matrix, ItemName, dim(matrix), : NAs
## introduced by coercion
write_omx(file = omxfile, matrix = cost, "cost", 
          description = "Generalized Cost")
## Warning in rhdf5::h5createDataset(H5File, matrix, ItemName, dim(matrix), : NAs
## introduced by coercion

Read

Now that the matrices are in the file, we can try reading them.

read_omx(omxfile, "trips")
##           [,1]     [,2]      [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
##  [1,] 200.9373 255.0890 170.18447 107.3130 254.3276 179.9681 138.1203 219.0461
##  [2,] 190.7874 237.7891  90.73566 196.1027 161.8728 183.2722 177.1912 128.4786
##  [3,] 131.4335 188.0883 166.25670 248.4283 158.5669 268.3977 158.4839 147.5777
##  [4,] 170.0416 249.3722  94.04694 209.2463 241.7237 306.8884 217.0058 189.0748
##  [5,] 214.7273 237.0695 136.74010 131.0028 151.6174 225.2910 253.3188 125.5032
##  [6,] 219.4897 204.4674 181.31692 128.2243 198.5592 239.3171 260.8063 258.6353
##  [7,] 139.5962 152.2528 165.62223 218.1044 211.6263 154.8894 236.7845 126.0086
##  [8,] 181.8162 190.2425 156.39206 112.0457 184.9396 226.6448 175.9396 178.4806
##  [9,] 118.6664 246.2761 194.91195 183.7728 166.1193 167.7053 228.1372 147.4181
## [10,] 187.1761 224.1489 187.31097 167.4219 232.7614 214.5494 137.6840 276.1293
##           [,9]    [,10]
##  [1,] 229.6414 179.1823
##  [2,] 188.8669 190.4259
##  [3,] 235.6447 203.4772
##  [4,] 235.8300 257.7674
##  [5,] 222.0121 229.7479
##  [6,] 207.9415 129.0177
##  [7,] 232.9882 119.6661
##  [8,] 311.0260 244.6463
##  [9,] 140.8027 207.4084
## [10,] 196.3022 261.3514
read_omx(omxfile, "cost")
##            [,1]      [,2]      [,3]       [,4]       [,5]      [,6]      [,7]
##  [1,] 1.2689575 0.7556669 1.6797322 11.3499437  0.5089873 1.8384003 5.2234923
##  [2,] 4.1345372 8.4053485 3.3296986  5.1214412  0.8144963 2.1172806 1.5692401
##  [3,] 0.9608439 1.7089266 2.6333591  0.3690528  0.3816496 8.6287940 4.5770493
##  [4,] 5.5376707 1.9822643 0.8223573  1.3746069 11.8313443 1.1448490 1.3506648
##  [5,] 1.4430905 6.8503048 5.0717262  1.7159117  3.9450923 1.1426175 1.7525830
##  [6,] 4.7739529 2.9362837 1.0889299  1.0170749  7.8922348 0.2668638 1.3808244
##  [7,] 5.2645027 7.6900217 3.4860107  4.4608160  4.6211790 4.9969622 7.0932327
##  [8,] 0.5178597 5.7080999 0.9392977  5.6171111  3.0101305 8.5849103 0.6261450
##  [9,] 7.6001499 9.5404901 1.8889436  5.2978376 10.3582413 0.8190602 3.2666464
## [10,] 8.3976644 7.0351497 0.8130238  7.0624106  2.9660609 0.5598979 0.6471694
##            [,8]       [,9]     [,10]
##  [1,] 0.8716216  4.1599918 3.0435645
##  [2,] 1.7956277  5.1732444 7.3546434
##  [3,] 3.1390942  0.6974628 1.3755431
##  [4,] 7.8618687  2.2288681 0.7580111
##  [5,] 1.5360376  5.0495679 0.6258167
##  [6,] 9.7491624 21.5033687 1.9868013
##  [7,] 3.4153819  2.0031386 0.4947714
##  [8,] 1.9960833  3.6011225 0.7043255
##  [9,] 7.0981141  5.4266247 0.9029450
## [10,] 4.7059251  2.8472723 0.9052510

You can transform a matrix to long format, which is more tidyverse friendly.

library(tidyverse)
read_omx(omxfile, "trips") %>%
  gather_matrix("trips")
## # A tibble: 100 x 3
##    origin destination trips
##     <int>       <int> <dbl>
##  1      1           1  201.
##  2      2           1  191.
##  3      3           1  131.
##  4      4           1  170.
##  5      5           1  215.
##  6      6           1  219.
##  7      7           1  140.
##  8      8           1  182.
##  9      9           1  119.
## 10     10           1  187.
## # … with 90 more rows

You can also read subsets of matrices.

read_omx(omxfile, "trips", row_index = 2:4, col_index = 2:5)
##          [,1]      [,2]     [,3]     [,4]
## [1,] 237.7891  90.73566 196.1027 161.8728
## [2,] 188.0883 166.25670 248.4283 158.5669
## [3,] 249.3722  94.04694 209.2463 241.7237

There are also functions to get the attributes of an OMX file. A call to list_omx() will show the names of the matrices inside the file.

get_omx_attr(omxfile)
## $SHAPE
## [1] 10 10
## 
## $VERSION
## [1] "0.2"
list_omx(omxfile)
## $OMXVersion
## [1] "0.2"
## 
## $Rows
## [1] 10
## 
## $Columns
## [1] 10
## 
## $Matrices
##    name dclass     dim   type navalue      description
## 1  cost  FLOAT 10 x 10 matrix      -1 Generalized Cost
## 2 trips  FLOAT 10 x 10 matrix      -1      Total Trips
## 
## $Lookups
## [1] name   dclass dim   
## <0 rows> (or 0-length row.names)

Lookups

Perhaps a better way to read a subset of a matrix is with a defined “lookup.” These can be stored in the omx container in addition to the matrices. You specify the lookup with a vector of the appropriate length, where TRUE means the row or column should be included, and FALSE means otherwise.

lookup <- zones %in% c(1, 2:5, 9)
lookup
##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE
write_lookup(omxfile, lookup_v = lookup, 
             name = "trial", description = "test lookup", replace = TRUE)

Now we can get the selected rows and columns from either matrix automatically.

read_selected_omx(omxfile, "trips", 
                  row_selection = "trial", col_selection = "trial")
##          [,1]     [,2]      [,3]     [,4]     [,5]     [,6]
## [1,] 200.9373 255.0890 170.18447 107.3130 254.3276 229.6414
## [2,] 190.7874 237.7891  90.73566 196.1027 161.8728 188.8669
## [3,] 131.4335 188.0883 166.25670 248.4283 158.5669 235.6447
## [4,] 170.0416 249.3722  94.04694 209.2463 241.7237 235.8300
## [5,] 214.7273 237.0695 136.74010 131.0028 151.6174 222.0121
## [6,] 118.6664 246.2761 194.91195 183.7728 166.1193 140.8027