format functions

The format set of functions can be combined to format a summarised_result object (see R package omopgenerics) into a nice dataframe, flextable or gt table to display. In what follows, we show the pipline for formatting summarised_results using these functions.

First, we load the relevant libraries and generate a mock summarised_result.

library(visOmopResults)
library(dplyr)
mock_sr <- mockSummarisedResult()
mock_sr |> glimpse()
#> Rows: 126
#> Columns: 16
#> $ result_id        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ cdm_name         <chr> "mock", "mock", "mock", "mock", "mock", "mock", "mock…
#> $ result_type      <chr> "mock_summarised_result", "mock_summarised_result", "…
#> $ package_name     <chr> "visOmopResults", "visOmopResults", "visOmopResults",…
#> $ package_version  <chr> "0.1.1", "0.1.1", "0.1.1", "0.1.1", "0.1.1", "0.1.1",…
#> $ group_name       <chr> "cohort_name", "cohort_name", "cohort_name", "cohort_…
#> $ group_level      <chr> "cohort1", "cohort1", "cohort1", "cohort1", "cohort1"…
#> $ strata_name      <chr> "overall", "age_group &&& sex", "age_group &&& sex", …
#> $ strata_level     <chr> "overall", "<40 &&& Male", ">=40 &&& Male", "<40 &&& …
#> $ variable_name    <chr> "number subjects", "number subjects", "number subject…
#> $ variable_level   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ estimate_name    <chr> "count", "count", "count", "count", "count", "count",…
#> $ estimate_type    <chr> "integer", "integer", "integer", "integer", "integer"…
#> $ estimate_value   <chr> "1535921", "8884563", "5266621", "660313", "6650851",…
#> $ additional_name  <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ additional_level <chr> "overall", "overall", "overall", "overall", "overall"…

1. formatEstimateValue()

This function provides tools to format the estimate_value column. It allows to change the number of decimals to display by estimate_type or estimate_name (decimals), and to change the decimal and thousand/million separator mark (decimalMark and bigMark respectively). By default, decimals of integer values are set to 0, numeric to 2, percentage to 1, and proportion to 3. The defaulted decimal mark is “.” while the thousand/million separator is “,”.

mock_sr <- mock_sr |> formatEstimateValue()
mock_sr |> glimpse()
#> Rows: 126
#> Columns: 16
#> $ result_id        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ cdm_name         <chr> "mock", "mock", "mock", "mock", "mock", "mock", "mock…
#> $ result_type      <chr> "mock_summarised_result", "mock_summarised_result", "…
#> $ package_name     <chr> "visOmopResults", "visOmopResults", "visOmopResults",…
#> $ package_version  <chr> "0.1.1", "0.1.1", "0.1.1", "0.1.1", "0.1.1", "0.1.1",…
#> $ group_name       <chr> "cohort_name", "cohort_name", "cohort_name", "cohort_…
#> $ group_level      <chr> "cohort1", "cohort1", "cohort1", "cohort1", "cohort1"…
#> $ strata_name      <chr> "overall", "age_group &&& sex", "age_group &&& sex", …
#> $ strata_level     <chr> "overall", "<40 &&& Male", ">=40 &&& Male", "<40 &&& …
#> $ variable_name    <chr> "number subjects", "number subjects", "number subject…
#> $ variable_level   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ estimate_name    <chr> "count", "count", "count", "count", "count", "count",…
#> $ estimate_type    <chr> "integer", "integer", "integer", "integer", "integer"…
#> $ estimate_value   <chr> "1,535,921", "8,884,563", "5,266,621", "660,313", "6,…
#> $ additional_name  <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ additional_level <chr> "overall", "overall", "overall", "overall", "overall"…

2. formatEstimateName()

This functions helps to manipulate estimate_name and estimate_value columns. For instance, if we want that all the variables for which we have counts and percentage to be display in a single row showing both as “N (%)” we can do it with this function.

The estimateNameFormat is where all combinations or renaming of estimates can be specified. Values from estime_name’s column should be specified between <…>. The new estimate_name values to use will be the names of the vector, or the value itself when it is not named.

mock_sr <- mock_sr |> 
  formatEstimateName(
    estimateNameFormat = c(
      "N (%)" = "<count> (<percentage>%)", 
      "N" = "<count>",
      "Mean (SD)" = "<mean> (<sd>)"
    ),
    keepNotFormatted = FALSE,
    useFormatOrder = FALSE
  )
mock_sr |> glimpse()
#> Rows: 72
#> Columns: 16
#> $ result_id        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ cdm_name         <chr> "mock", "mock", "mock", "mock", "mock", "mock", "mock…
#> $ result_type      <chr> "mock_summarised_result", "mock_summarised_result", "…
#> $ package_name     <chr> "visOmopResults", "visOmopResults", "visOmopResults",…
#> $ package_version  <chr> "0.1.1", "0.1.1", "0.1.1", "0.1.1", "0.1.1", "0.1.1",…
#> $ group_name       <chr> "cohort_name", "cohort_name", "cohort_name", "cohort_…
#> $ group_level      <chr> "cohort1", "cohort1", "cohort1", "cohort1", "cohort1"…
#> $ strata_name      <chr> "overall", "age_group &&& sex", "age_group &&& sex", …
#> $ strata_level     <chr> "overall", "<40 &&& Male", ">=40 &&& Male", "<40 &&& …
#> $ variable_name    <chr> "number subjects", "number subjects", "number subject…
#> $ variable_level   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ estimate_name    <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N"…
#> $ estimate_type    <chr> "character", "character", "character", "character", "…
#> $ estimate_value   <chr> "1,535,921", "8,884,563", "5,266,621", "660,313", "6,…
#> $ additional_name  <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ additional_level <chr> "overall", "overall", "overall", "overall", "overall"…

Additional input arguments are keepNotFormatted to specify whether not formatted rows should be returned or dropped, and useFormatOrder to define if rows should be sorted as in estimateNameFormat or if the original order should be kept. In the latter scenario, when more than one estimate is pulled together, the new estimate position will be the first of the estimates being merged.

3. formatHeader()

This function helps to create a nice header for a flextable, gt table and other table formatting packages.

To this aim, this function pivots the columns specified in header “widening” the table. The name of the new columns can be formatted with the arguments header, delim, inlcudeHeaderName, and includeHeaderKey to later convert obtain a nice header of the formatted table (flextable or gt table).

Header keys

There are 3 different types of headers, identified with the keys “header”, “header_name”, and “header_level”.

For instance, we might want to pivot by “group_level” and have an upper header called “Names of the cohorts”. To do that we would proceed as follows:

mock_sr |>
  formatHeader(
    header = c("Names of the cohorts", "group_level"),
    delim = "\n",
    includeHeaderName = TRUE,
    includeHeaderKey = TRUE
  ) |>
  glimpse()
#> Rows: 36
#> Columns: 16
#> $ result_id                                                                       <int> …
#> $ cdm_name                                                                        <chr> …
#> $ result_type                                                                     <chr> …
#> $ package_name                                                                    <chr> …
#> $ package_version                                                                 <chr> …
#> $ group_name                                                                      <chr> …
#> $ strata_name                                                                     <chr> …
#> $ strata_level                                                                    <chr> …
#> $ variable_name                                                                   <chr> …
#> $ variable_level                                                                  <chr> …
#> $ estimate_name                                                                   <chr> …
#> $ estimate_type                                                                   <chr> …
#> $ additional_name                                                                 <chr> …
#> $ additional_level                                                                <chr> …
#> $ `[header]Names of the cohorts\n[header_name]group_level\n[header_level]cohort1` <chr> …
#> $ `[header]Names of the cohorts\n[header_name]group_level\n[header_level]cohort2` <chr> …

The labels indicating which type of header refers to in the new column names can be removed with includeHeaderKey. However, having these keys in our header will allow to style separately the different header types in the next step (fxTable and gtTable).

Continuing with our example, we want to pivot by strata (name and level), but, we do not want the column names to appear in the header:

mock_sr <- mock_sr |>
  mutate(across(c("strata_name", "strata_level"), ~ gsub("&&&", "and", .x))) |>
  formatHeader(
    header = c("Stratifications", "strata_name", "strata_level"),
    delim = "\n",
    includeHeaderName = FALSE,
    includeHeaderKey = TRUE
  ) 

mock_sr |> glimpse()
#> Rows: 8
#> Columns: 22
#> $ result_id                                                                                 <int> …
#> $ cdm_name                                                                                  <chr> …
#> $ result_type                                                                               <chr> …
#> $ package_name                                                                              <chr> …
#> $ package_version                                                                           <chr> …
#> $ group_name                                                                                <chr> …
#> $ group_level                                                                               <chr> …
#> $ variable_name                                                                             <chr> …
#> $ variable_level                                                                            <chr> …
#> $ estimate_name                                                                             <chr> …
#> $ estimate_type                                                                             <chr> …
#> $ additional_name                                                                           <chr> …
#> $ additional_level                                                                          <chr> …
#> $ `[header]Stratifications\n[header_level]overall\n[header_level]overall`                   <chr> …
#> $ `[header]Stratifications\n[header_level]age_group and sex\n[header_level]<40 and Male`    <chr> …
#> $ `[header]Stratifications\n[header_level]age_group and sex\n[header_level]>=40 and Male`   <chr> …
#> $ `[header]Stratifications\n[header_level]age_group and sex\n[header_level]<40 and Female`  <chr> …
#> $ `[header]Stratifications\n[header_level]age_group and sex\n[header_level]>=40 and Female` <chr> …
#> $ `[header]Stratifications\n[header_level]sex\n[header_level]Male`                          <chr> …
#> $ `[header]Stratifications\n[header_level]sex\n[header_level]Female`                        <chr> …
#> $ `[header]Stratifications\n[header_level]age_group\n[header_level]<40`                     <chr> …
#> $ `[header]Stratifications\n[header_level]age_group\n[header_level]>=40`                    <chr> …

Notice, how we substitute the keyWord “&&&” to “and”, to get a more readble and nice header.

4. gtTable() and fxTable()

Finally, we have the functions gtTable and fxTable which will transform our tibble into a gt or flextable object respectively. These functions provide several tools to personalise the formatted table.

gtTable()

Let’s start by manipulating the dataframe to have the columns that we want to display, and then use gtTable with default values:

# first we select the columns we want:
mock_sr <- mock_sr |>
  splitGroup() |>
  select(!all_of(c("cdm_name", "result_type", "package_name", 
                                 "package_version", "estimate_type", "result_id",
                                 "additional_name", "additional_level"))) 
mock_sr |>  gtTable()
Stratifications
cohort_name variable_name variable_level estimate_name overall age_group and sex sex age_group
overall <40 and Male >=40 and Male <40 and Female >=40 and Female Male Female <40 >=40
cohort1 number subjects - N 1,535,921 8,884,563 5,266,621 660,313 6,650,851 8,435,465 5,705,803 3,936,663 639,236
cohort2 number subjects - N 4,871,502 7,496,700 1,978,308 437,373 8,406,322 8,692,055 6,282,721 4,482,585 9,374,762
cohort1 age - Mean (SD) 29.49 (7.65) 77.63 (0.22) 70.39 (7.61) 91.31 (4.72) 41.94 (9.66) 35.89 (5.24) 77.30 (8.90) 75.70 (7.95) 3.40 (5.72)
cohort2 age - Mean (SD) 37.60 (7.24) 76.95 (7.07) 44.98 (6.54) 54.89 (6.69) 47.18 (0.73) 68.88 (2.86) 84.14 (5.69) 83.07 (4.03) 52.18 (0.95)
cohort1 Medications Amoxiciline N (%) 24,278 (48.4%) 68,965 (55.0%) 18,107 (23.9%) 46,174 (75.1%) 85,762 (23.4%) 50,265 (20.2%) 91,435 (41.3%) 37,294 (99.5%) 40,956 (40.8%)
cohort2 Medications Amoxiciline N (%) 89,597 (77.1%) 27,128 (45.8%) 52,948 (12.2%) 17,552 (96.3%) 29,375 (94.8%) 91,377 (93.4%) 91,953 (92.6%) 30,360 (65.8%) 53,567 (16.1%)
cohort1 Medications Ibuprofen N (%) 2,951 (1.8%) 91,435 (19.4%) 82,945 (45.1%) 27,260 (87.4%) 38,841 (59.2%) 97,900 (62.4%) 69,992 (64.1%) 4,486 (73.2%) 17,312 (66.8%)
cohort2 Medications Ibuprofen N (%) 24,422 (68.1%) 94,818 (7.9%) 32,819 (28.1%) 2,318 (18.4%) 41,099 (89.7%) 89,002 (93.8%) 51,938 (90.8%) 53,369 (94.2%) 35,151 (28.0%)

Now, we want to group results by “cohort_name”. More specifically we want to have a row which the name of each cohort before the results of that cohort, and that cohort1 comes before cohort2. Additionally, we want to merge those rows what contain the same information for all the columns. To get this table we will use gtTable as follows:

mock_sr |>  
  gtTable(
    groupNameCol = "cohort_name",
    groupNameAsColumn = FALSE,
    groupOrder = c("cohort1", "cohort2"),
    colsToMergeRows = "all_columns"
  )
Stratifications
variable_name variable_level estimate_name overall age_group and sex sex age_group
overall <40 and Male >=40 and Male <40 and Female >=40 and Female Male Female <40 >=40
cohort1
number subjects - N 1,535,921 8,884,563 5,266,621 660,313 6,650,851 8,435,465 5,705,803 3,936,663 639,236
age - Mean (SD) 29.49 (7.65) 77.63 (0.22) 70.39 (7.61) 91.31 (4.72) 41.94 (9.66) 35.89 (5.24) 77.30 (8.90) 75.70 (7.95) 3.40 (5.72)
Medications Amoxiciline N (%) 24,278 (48.4%) 68,965 (55.0%) 18,107 (23.9%) 46,174 (75.1%) 85,762 (23.4%) 50,265 (20.2%) 91,435 (41.3%) 37,294 (99.5%) 40,956 (40.8%)
Ibuprofen N (%) 2,951 (1.8%) 91,435 (19.4%) 82,945 (45.1%) 27,260 (87.4%) 38,841 (59.2%) 97,900 (62.4%) 69,992 (64.1%) 4,486 (73.2%) 17,312 (66.8%)
cohort2
number subjects - N 4,871,502 7,496,700 1,978,308 437,373 8,406,322 8,692,055 6,282,721 4,482,585 9,374,762
age - Mean (SD) 37.60 (7.24) 76.95 (7.07) 44.98 (6.54) 54.89 (6.69) 47.18 (0.73) 68.88 (2.86) 84.14 (5.69) 83.07 (4.03) 52.18 (0.95)
Medications Amoxiciline N (%) 89,597 (77.1%) 27,128 (45.8%) 52,948 (12.2%) 17,552 (96.3%) 29,375 (94.8%) 91,377 (93.4%) 91,953 (92.6%) 30,360 (65.8%) 53,567 (16.1%)
Ibuprofen N (%) 24,422 (68.1%) 94,818 (7.9%) 32,819 (28.1%) 2,318 (18.4%) 41,099 (89.7%) 89,002 (93.8%) 51,938 (90.8%) 53,369 (94.2%) 35,151 (28.0%)

We might also want to modify the default style of the table. For instance, we might want to highlight the cohort_name labels with a blue background, have the body text in red, and use a combination of orange and yellow for the header. We can do it with the style argument:

mock_sr |>  
  gtTable(
    style = list(
      "header" = list(gt::cell_text(weight = "bold"), 
                      gt::cell_fill(color = "orange")),
      "header_level" = list(gt::cell_text(weight = "bold"), 
                      gt::cell_fill(color = "yellow")),
      "column_name" = gt::cell_text(weight = "bold"),
      "group_label" = list(gt::cell_fill(color = "blue"),
                           gt::cell_text(color = "white", weight = "bold")),
      "body" = gt::cell_text(color = "red")
    ),
    groupNameCol = "cohort_name",
    groupNameAsColumn = FALSE,
    groupOrder = c("cohort1", "cohort2"),
    colsToMergeRows = "all_columns"
  )
Stratifications
variable_name variable_level estimate_name overall age_group and sex sex age_group
overall <40 and Male >=40 and Male <40 and Female >=40 and Female Male Female <40 >=40
cohort1
number subjects - N 1,535,921 8,884,563 5,266,621 660,313 6,650,851 8,435,465 5,705,803 3,936,663 639,236
age - Mean (SD) 29.49 (7.65) 77.63 (0.22) 70.39 (7.61) 91.31 (4.72) 41.94 (9.66) 35.89 (5.24) 77.30 (8.90) 75.70 (7.95) 3.40 (5.72)
Medications Amoxiciline N (%) 24,278 (48.4%) 68,965 (55.0%) 18,107 (23.9%) 46,174 (75.1%) 85,762 (23.4%) 50,265 (20.2%) 91,435 (41.3%) 37,294 (99.5%) 40,956 (40.8%)
Ibuprofen N (%) 2,951 (1.8%) 91,435 (19.4%) 82,945 (45.1%) 27,260 (87.4%) 38,841 (59.2%) 97,900 (62.4%) 69,992 (64.1%) 4,486 (73.2%) 17,312 (66.8%)
cohort2
number subjects - N 4,871,502 7,496,700 1,978,308 437,373 8,406,322 8,692,055 6,282,721 4,482,585 9,374,762
age - Mean (SD) 37.60 (7.24) 76.95 (7.07) 44.98 (6.54) 54.89 (6.69) 47.18 (0.73) 68.88 (2.86) 84.14 (5.69) 83.07 (4.03) 52.18 (0.95)
Medications Amoxiciline N (%) 89,597 (77.1%) 27,128 (45.8%) 52,948 (12.2%) 17,552 (96.3%) 29,375 (94.8%) 91,377 (93.4%) 91,953 (92.6%) 30,360 (65.8%) 53,567 (16.1%)
Ibuprofen N (%) 24,422 (68.1%) 94,818 (7.9%) 32,819 (28.1%) 2,318 (18.4%) 41,099 (89.7%) 89,002 (93.8%) 51,938 (90.8%) 53,369 (94.2%) 35,151 (28.0%)

fxTable()

To obtain a similar result but with a flextable object, we can use fxTable with the same arguments as before, however, style must be adapted to use the officer package since it is the accepted by flextable.

mock_sr |>  
  fxTable(
    style = list(
      "header" = list(
        "cell" = officer::fp_cell(background.color = "orange"),
        "text" = officer::fp_text(bold = TRUE)),
      "header_level" = list(
        "cell" = officer::fp_cell(background.color = "yellow"),
        "text" = officer::fp_text(bold = TRUE)),
      "column_name" = list("text" = officer::fp_text(bold = TRUE)),
      "group_label" = list(
        "cell" = officer::fp_cell(background.color = "blue"),
        "text" = officer::fp_text(bold = TRUE, color = "white")),
      "body" = list("text" = officer::fp_text(color = "red"))
    ),
    groupNameCol = "cohort_name",
    groupNameAsColumn = FALSE,
    groupOrder = c("cohort1", "cohort2"),
    colsToMergeRows = "all_columns"
  )

cohort_name

variable_name

variable_level

estimate_name

Stratifications

overall

age_group and sex

sex

age_group

overall

<40 and Male

>=40 and Male

<40 and Female

>=40 and Female

Male

Female

<40

>=40

cohort1

number subjects

-

N

1,535,921

8,884,563

5,266,621

660,313

6,650,851

8,435,465

5,705,803

3,936,663

639,236

age

-

Mean (SD)

29.49 (7.65)

77.63 (0.22)

70.39 (7.61)

91.31 (4.72)

41.94 (9.66)

35.89 (5.24)

77.30 (8.90)

75.70 (7.95)

3.40 (5.72)

Medications

Amoxiciline

N (%)

24,278 (48.4%)

68,965 (55.0%)

18,107 (23.9%)

46,174 (75.1%)

85,762 (23.4%)

50,265 (20.2%)

91,435 (41.3%)

37,294 (99.5%)

40,956 (40.8%)

Ibuprofen

N (%)

2,951 (1.8%)

91,435 (19.4%)

82,945 (45.1%)

27,260 (87.4%)

38,841 (59.2%)

97,900 (62.4%)

69,992 (64.1%)

4,486 (73.2%)

17,312 (66.8%)

cohort2

number subjects

-

N

4,871,502

7,496,700

1,978,308

437,373

8,406,322

8,692,055

6,282,721

4,482,585

9,374,762

age

-

Mean (SD)

37.60 (7.24)

76.95 (7.07)

44.98 (6.54)

54.89 (6.69)

47.18 (0.73)

68.88 (2.86)

84.14 (5.69)

83.07 (4.03)

52.18 (0.95)

Medications

Amoxiciline

N (%)

89,597 (77.1%)

27,128 (45.8%)

52,948 (12.2%)

17,552 (96.3%)

29,375 (94.8%)

91,377 (93.4%)

91,953 (92.6%)

30,360 (65.8%)

53,567 (16.1%)

Ibuprofen

N (%)

24,422 (68.1%)

94,818 (7.9%)

32,819 (28.1%)

2,318 (18.4%)

41,099 (89.7%)

89,002 (93.8%)

51,938 (90.8%)

53,369 (94.2%)

35,151 (28.0%)