Start by loading all usual libraries.
library(ClinReport)
library(officer)
library(flextable)
library(dplyr)
library(reshape2)
library(nlme)
library(emmeans)
library(car)
Load your data.
# We will use fake data
data(datafake)
print(head(data))
#>
#> 1 function (..., list = character(), package = NULL, lib.loc = NULL,
#> 2 verbose = getOption("verbose"), envir = .GlobalEnv, overwrite = TRUE)
#> 3 {
#> 4 fileExt <- function(x) {
#> 5 db <- grepl("\\\\.[^.]+\\\\.(gz|bz2|xz)$", x)
#> 6 ans <- sub(".*\\\\.", "", x)
Create a statistical output for a quantitative response and two explicative variables. For example a treatment group and a time variable corresponding to the visits of a clinical trial.
For that we use the report.quanti()
function:
tab1=report.quanti(data=datafake,y="y_numeric",
x1="GROUP",x2="TIMEPOINT",at.row="TIMEPOINT",
subjid="SUBJID")
tab1
#>
#> ############################################
#> Quantitative descriptive statistics of: y_numeric
#> ############################################
#>
#> TIMEPOINT Statistics A (N=30) B (N=21) C (N=17)
#> 1 D0 N 30 20 16
#> 2 D0 Mean (SD) -0.93(0.86) -0.67(1.09) -1.19(0.92)
#> 3 D0 Median -0.82 -0.69 -1.26
#> 4 D0 [Q1;Q3] [-1.59;-0.16] [-1.39;-0.06] [-1.62;-0.83]
#> 5 D0 [Min;Max] [-2.34;0.36] [-2.44;2.10] [-2.99;0.66]
#> 6 D0 Missing 1 1 0
#> 7
#> 8 D1 N 30 20 16
#> 9 D1 Mean (SD) 1.83(1.04) 4.17(1.28) 4.98(0.69)
#> 10 D1 Median 1.78 4.19 5.08
#> 11 D1 [Q1;Q3] [ 0.94; 2.54] [ 3.23; 4.92] [ 4.58; 5.46]
#> 12 D1 [Min;Max] [ 0.11;3.88] [ 1.48;6.19] [ 3.80;6.23]
#> 13 D1 Missing 1 0 0
#> 14
#> 15 D2 N 30 20 16
#> 16 D2 Mean (SD) 1.97(1.17) 4.04(0.89) 4.90(1.36)
#> 17 D2 Median 1.66 4.19 5.06
#> 18 D2 [Q1;Q3] [ 1.23; 2.86] [ 3.62; 4.36] [ 4.34; 5.20]
#> 19 D2 [Min;Max] [-0.18;4.36] [ 2.03;5.63] [ 2.39;7.96]
#> 20 D2 Missing 1 1 0
#> 21
#> 22 D3 N 30 20 16
#> 23 D3 Mean (SD) 1.78(1.17) 3.81(0.94) 5.07(1.12)
#> 24 D3 Median 1.78 3.63 5.22
#> 25 D3 [Q1;Q3] [ 0.93; 2.42] [ 3.13; 4.44] [ 4.11; 5.66]
#> 26 D3 [Min;Max] [-0.16;3.90] [ 2.46;6.01] [ 3.16;7.37]
#> 27 D3 Missing 0 1 1
#> 28
#> 29 D4 N 30 20 16
#> 30 D4 Mean (SD) 1.83(0.85) 3.80(0.95) 5.17(1.03)
#> 31 D4 Median 1.67 3.83 4.88
#> 32 D4 [Q1;Q3] [ 1.26; 2.32] [ 3.12; 4.42] [ 4.69; 5.50]
#> 33 D4 [Min;Max] [ 0.38;3.97] [ 2.31;5.41] [ 3.24;6.96]
#> 34 D4 Missing 1 1 1
#> 35
#> 36 D5 N 30 20 16
#> 37 D5 Mean (SD) 2.27(1.20) 3.64(1.19) 4.43(0.98)
#> 38 D5 Median 2.50 3.86 4.57
#> 39 D5 [Q1;Q3] [ 1.77; 3.21] [ 2.59; 4.60] [ 3.44; 4.97]
#> 40 D5 [Min;Max] [-1.19;4.31] [ 0.91;5.12] [ 2.95;6.54]
#> 41 D5 Missing 0 0 0
#>
#> ############################################
The at.row
argument is used to space the results between each visit and the subjid
argument is used
to add in the columns header the total number of subjects randomized by treatment group.
Generally we want also the corresponding graphics. So you can use the specific plot function to print the corresponding graphic of your table:
g1=plot(tab1,title="The title that you want to display")
print(g1)
You can modify the plot by using the following arguments of the plot.desc()
function:
args(ClinReport:::plot.desc)
#> function (x, ..., title = "", ylim = NULL, xlim = NULL, xlab = "",
#> ylab = "", legend.label = "Group", add.sd = F, add.ci = F,
#> size.title = 10, add.line = T)
#> NULL
Then we can use the report.doc()
function which use the flextable package to format
the output into a flextable
object, ready to export to Microsoft Word
with the officer package.
The table will look like this (we can have a preview in HTML, just to check):
report.doc(tab1,title="Quantitative statistics (2 explicative variables)",
colspan.value="Treatment group", init.numbering =T )
Table 1: Quantitative statistics (2 explicative variables) | ||||
Treatment group | ||||
TIMEPOINT | Statistics | A (N=30) | B (N=21) | C (N=17) |
D0 | N | 30 | 20 | 16 |
Mean (SD) | -0.93(0.86) | -0.67(1.09) | -1.19(0.92) | |
Median | -0.82 | -0.69 | -1.26 | |
[Q1;Q3] | [-1.59;-0.16] | [-1.39;-0.06] | [-1.62;-0.83] | |
[Min;Max] | [-2.34;0.36] | [-2.44;2.10] | [-2.99;0.66] | |
Missing | 1 | 1 | 0 | |
D1 | N | 30 | 20 | 16 |
Mean (SD) | 1.83(1.04) | 4.17(1.28) | 4.98(0.69) | |
Median | 1.78 | 4.19 | 5.08 | |
[Q1;Q3] | [ 0.94; 2.54] | [ 3.23; 4.92] | [ 4.58; 5.46] | |
[Min;Max] | [ 0.11;3.88] | [ 1.48;6.19] | [ 3.80;6.23] | |
Missing | 1 | 0 | 0 | |
D2 | N | 30 | 20 | 16 |
Mean (SD) | 1.97(1.17) | 4.04(0.89) | 4.90(1.36) | |
Median | 1.66 | 4.19 | 5.06 | |
[Q1;Q3] | [ 1.23; 2.86] | [ 3.62; 4.36] | [ 4.34; 5.20] | |
[Min;Max] | [-0.18;4.36] | [ 2.03;5.63] | [ 2.39;7.96] | |
Missing | 1 | 1 | 0 | |
D3 | N | 30 | 20 | 16 |
Mean (SD) | 1.78(1.17) | 3.81(0.94) | 5.07(1.12) | |
Median | 1.78 | 3.63 | 5.22 | |
[Q1;Q3] | [ 0.93; 2.42] | [ 3.13; 4.44] | [ 4.11; 5.66] | |
[Min;Max] | [-0.16;3.90] | [ 2.46;6.01] | [ 3.16;7.37] | |
Missing | 0 | 1 | 1 | |
D4 | N | 30 | 20 | 16 |
Mean (SD) | 1.83(0.85) | 3.80(0.95) | 5.17(1.03) | |
Median | 1.67 | 3.83 | 4.88 | |
[Q1;Q3] | [ 1.26; 2.32] | [ 3.12; 4.42] | [ 4.69; 5.50] | |
[Min;Max] | [ 0.38;3.97] | [ 2.31;5.41] | [ 3.24;6.96] | |
Missing | 1 | 1 | 1 | |
D5 | N | 30 | 20 | 16 |
Mean (SD) | 2.27(1.20) | 3.64(1.19) | 4.43(0.98) | |
Median | 2.50 | 3.86 | 4.57 | |
[Q1;Q3] | [ 1.77; 3.21] | [ 2.59; 4.60] | [ 3.44; 4.97] | |
[Min;Max] | [-1.19;4.31] | [ 0.91;5.12] | [ 2.95;6.54] | |
Missing | 0 | 0 | 0 | |
All output numbers will be increased automatically after each call of the function report.doc()
.
You can restart the numbering of the outputs by using init.numbering=T
argument in report.doc()
function.
Finally, we add those results to a rdocx
object:
doc=read_docx()
doc=report.doc(tab1,title="Quantitative statistics (2 explicative variables)",
colspan.value="Treatment group",doc=doc,init.numbering=T)
doc=body_add_gg(doc, value = g1, style = "centered" )
Write the doc to a docx file:
file=paste(tempfile(),".docx",sep="")
print(doc, target =file)
#Open it
#shell.exec(file)
An example of qualitative statistics with one explicative variable
tab=report.quali(data=datafake,y="y_logistic",
x1="VAR",total=T,subjid="SUBJID")
report.doc(tab,title="Qualitative table with two variables",
colspan.value="A variable")
Table 2: Qualitative table with two variables | ||||
A variable | ||||
Levels | Statistics | Cat 1 (N=65) | Cat 2 (N=63) | Total (N=128) |
0 | n (column %) | 100(48.08%) | 86(45.74%) | 186(46.97%) |
1 | n (column %) | 103(49.52%) | 97(51.60%) | 200(50.51%) |
Missing n(%) | 5(2.40%) | 5(2.66%) | 10(2.53%) | |
An example of qualitative statistics with two explicative variables
tab=report.quali(data=datafake,y="y_logistic",
x1="GROUP",x2="TIMEPOINT",at.row="TIMEPOINT",
total=T,subjid="SUBJID")
report.doc(tab,title="Qualitative table with two variables",
colspan.value="Treatment group")
Table 3: Qualitative table with two variables | ||||||
Treatment group | ||||||
TIMEPOINT | Levels | Statistics | A (N=30) | B (N=21) | C (N=17) | Total (N=68) |
D0 | 0 | n (column %) | 11(36.67%) | 11(55.00%) | 7(43.75%) | 29(43.94%) |
1 | n (column %) | 18(60.00%) | 8(40.00%) | 7(43.75%) | 33(50.00%) | |
Missing n(%) | 1(3.33%) | 1(5.00%) | 2(12.50%) | 4(6.06%) | ||
D1 | 0 | n (column %) | 7(23.33%) | 13(65.00%) | 8(50.00%) | 28(42.42%) |
1 | n (column %) | 21(70.00%) | 7(35.00%) | 7(43.75%) | 35(53.03%) | |
Missing n(%) | 2(6.67%) | 0(0%) | 1(6.25%) | 3(4.55%) | ||
D2 | 0 | n (column %) | 18(60.00%) | 7(35.00%) | 11(68.75%) | 36(54.55%) |
1 | n (column %) | 12(40.00%) | 13(65.00%) | 5(31.25%) | 30(45.45%) | |
Missing n(%) | 0(0%) | 0(0%) | 0(0%) | 0(0%) | ||
D3 | 0 | n (column %) | 11(36.67%) | 10(50.00%) | 7(43.75%) | 28(42.42%) |
1 | n (column %) | 19(63.33%) | 10(50.00%) | 9(56.25%) | 38(57.58%) | |
Missing n(%) | 0(0%) | 0(0%) | 0(0%) | 0(0%) | ||
D4 | 0 | n (column %) | 18(60.00%) | 12(60.00%) | 6(37.50%) | 36(54.55%) |
1 | n (column %) | 12(40.00%) | 8(40.00%) | 8(50.00%) | 28(42.42%) | |
Missing n(%) | 0(0%) | 0(0%) | 2(12.50%) | 2(3.03%) | ||
D5 | 0 | n (column %) | 14(46.67%) | 7(35.00%) | 8(50.00%) | 29(43.94%) |
1 | n (column %) | 15(50.00%) | 13(65.00%) | 8(50.00%) | 36(54.55%) | |
Missing n(%) | 1(3.33%) | 0(0%) | 0(0%) | 1(1.52%) | ||
An example of quantitative statistics with one explicative variable
tab=report.quanti(data=datafake,y="y_numeric",
x1="VAR",total=T,subjid="SUBJID")
report.doc(tab,title="Quantitative table with one explicative variable",
colspan.value="A variable")
Table 4: Quantitative table with one explicative variable | |||
A variable | |||
Statistics | Cat 1 (N=65) | Cat 2 (N=63) | Total (N=128) |
N | 208 | 188 | 396 |
Mean (SD) | 2.55(2.18) | 2.56(2.23) | 2.56(2.20) |
Median | 2.64 | 2.79 | 2.71 |
[Q1;Q3] | [0.94;4.36] | [1.07;4.19] | [1.04;4.33] |
[Min;Max] | [-2.39;6.43] | [-2.99;7.96] | [-2.99;7.96] |
Missing | 4 | 6 | 10 |
An example of quantitative statistics with two explicative variables
tab=report.quanti(data=datafake,y="y_numeric",
x1="GROUP",x2="TIMEPOINT",at.row="TIMEPOINT",
total=T,subjid="SUBJID")
report.doc(tab,title="Quantitative table with two explicative variables",
colspan.value="Treatment group")
Table 5: Quantitative table with two explicative variables | |||||
Treatment group | |||||
TIMEPOINT | Statistics | A (N=30) | B (N=21) | C (N=17) | Total (N=68) |
D0 | N | 30 | 20 | 16 | 66 |
Mean (SD) | -0.93(0.86) | -0.67(1.09) | -1.19(0.92) | -0.92(0.95) | |
Median | -0.82 | -0.69 | -1.26 | -0.86 | |
[Q1;Q3] | [-1.59;-0.16] | [-1.39;-0.06] | [-1.62;-0.83] | [-1.55;-0.16] | |
[Min;Max] | [-2.34;0.36] | [-2.44;2.10] | [-2.99;0.66] | [-2.99;2.10] | |
Missing | 1 | 1 | 0 | 2 | |
D1 | N | 30 | 20 | 16 | 66 |
Mean (SD) | 1.83(1.04) | 4.17(1.28) | 4.98(0.69) | 3.33(1.73) | |
Median | 1.78 | 4.19 | 5.08 | 3.57 | |
[Q1;Q3] | [ 0.94; 2.54] | [ 3.23; 4.92] | [ 4.58; 5.46] | [ 1.78; 4.91] | |
[Min;Max] | [ 0.11;3.88] | [ 1.48;6.19] | [ 3.80;6.23] | [ 0.11;6.23] | |
Missing | 1 | 0 | 0 | 1 | |
D2 | N | 30 | 20 | 16 | 66 |
Mean (SD) | 1.97(1.17) | 4.04(0.89) | 4.90(1.36) | 3.32(1.70) | |
Median | 1.66 | 4.19 | 5.06 | 3.57 | |
[Q1;Q3] | [ 1.23; 2.86] | [ 3.62; 4.36] | [ 4.34; 5.20] | [ 1.89; 4.44] | |
[Min;Max] | [-0.18;4.36] | [ 2.03;5.63] | [ 2.39;7.96] | [-0.18;7.96] | |
Missing | 1 | 1 | 0 | 2 | |
D3 | N | 30 | 20 | 16 | 66 |
Mean (SD) | 1.78(1.17) | 3.81(0.94) | 5.07(1.12) | 3.15(1.75) | |
Median | 1.78 | 3.63 | 5.22 | 3.15 | |
[Q1;Q3] | [ 0.93; 2.42] | [ 3.13; 4.44] | [ 4.11; 5.66] | [ 1.80; 4.39] | |
[Min;Max] | [-0.16;3.90] | [ 2.46;6.01] | [ 3.16;7.37] | [-0.16;7.37] | |
Missing | 0 | 1 | 1 | 2 | |
D4 | N | 30 | 20 | 16 | 66 |
Mean (SD) | 1.83(0.85) | 3.80(0.95) | 5.17(1.03) | 3.22(1.66) | |
Median | 1.67 | 3.83 | 4.88 | 3.16 | |
[Q1;Q3] | [ 1.26; 2.32] | [ 3.12; 4.42] | [ 4.69; 5.50] | [ 1.69; 4.48] | |
[Min;Max] | [ 0.38;3.97] | [ 2.31;5.41] | [ 3.24;6.96] | [ 0.38;6.96] | |
Missing | 1 | 1 | 1 | 3 | |
D5 | N | 30 | 20 | 16 | 66 |
Mean (SD) | 2.27(1.20) | 3.64(1.19) | 4.43(0.98) | 3.21(1.45) | |
Median | 2.50 | 3.86 | 4.57 | 3.28 | |
[Q1;Q3] | [ 1.77; 3.21] | [ 2.59; 4.60] | [ 3.44; 4.97] | [ 2.42; 4.44] | |
[Min;Max] | [-1.19;4.31] | [ 0.91;5.12] | [ 2.95;6.54] | [-1.19;6.54] | |
Missing | 0 | 0 | 0 | 0 | |
You can mix qualitative and quantitative outputs.
But it's only possible for 1 explicative variable, and it should be the same variable for both response:
tab1=report.quanti(data=datafake,y="y_numeric",
x1="GROUP",subjid="SUBJID",y.label="Y numeric")
tab2=report.quali(data=datafake,y="y_logistic",
x1="GROUP",subjid="SUBJID",y.label="Y logistic")
tab3=regroup(tab1,tab2,rbind.label="The label of your choice")
report.doc(tab3,title="Mixed Qualitative and Quantitative outputs",
colspan.value="Treatment group")
Table 6: Mixed Qualitative and Quantitative outputs | |||||
Treatment group | |||||
The label of your choice | Levels | Statistics | A (N=30) | B (N=21) | C (N=17) |
Y numeric | N | 180 | 120 | 96 | |
Mean (SD) | 1.46(1.50) | 3.15(2.00) | 3.87(2.52) | ||
Median | 1.59 | 3.75 | 4.73 | ||
[Q1;Q3] | [0.45;2.50] | [2.46;4.44] | [3.44;5.30] | ||
[Min;Max] | [-2.34;4.36] | [-2.44;6.19] | [-2.99;7.96] | ||
Missing | 4 | 4 | 2 | ||
Y logistic | 0 | n (column %) | 79(43.89%) | 60(50.00%) | 47(48.96%) |
1 | n (column %) | 97(53.89%) | 59(49.17%) | 44(45.83%) | |
Missing n(%) | 4(2.22%) | 1(0.83%) | 5(5.21%) | ||
For the anova table reporting, it's basically a call
to the function xtable_to_flextable()
. The function
report.doc()
just handle the numbering of the output
and the header with the title.
# Removing baseline data for the model
data.mod=droplevels(datafake[datafake$TIMEPOINT!="D0",])
mod=lme(y_numeric~baseline+GROUP+TIMEPOINT+GROUP*TIMEPOINT,
random=~1|SUBJID,data=data.mod,na.action=na.omit)
anov3=Anova(mod,3)
report.doc(anov3,title="Mixed Qualitative and Quantitative output")
Table 7: Mixed Qualitative and Quantitative output | |||
Chisq | Df | Pr(>Chisq) | |
(Intercept) | 84.699 | 1.000 | 0.000 |
baseline | 1.695 | 1.000 | 0.193 |
GROUP | 107.561 | 2.000 | 0.000 |
TIMEPOINT | 4.426 | 4.000 | 0.351 |
GROUP:TIMEPOINT | 11.671 | 8.000 | 0.166 |
LS-means reporting are based on the package emmeans.
The function report.lsmeans()
enables to format the output:
lsm=emmeans(mod,~GROUP|TIMEPOINT)
tab=report.lsmeans(lsm,at.row="TIMEPOINT")
report.doc(tab,title="LS-Means example",
colspan.value="Treatment Group")
Table 8: LS-Means example | ||||
Treatment Group | ||||
TIMEPOINT | Statistics | A | B | C |
D1 | Estimate (SE) | 1.81(0.20) | 4.17(0.24) | 5.00(0.27) |
95% CI | [1.41;2.22] | [3.69;4.65] | [4.46;5.54] | |
P-value | <0.001 | <0.001 | <0.001 | |
D2 | Estimate (SE) | 1.96(0.20) | 4.05(0.25) | 4.90(0.27) |
95% CI | [1.56;2.36] | [3.56;4.55] | [4.36;5.44] | |
P-value | <0.001 | <0.001 | <0.001 | |
D3 | Estimate (SE) | 1.79(0.20) | 3.79(0.25) | 5.08(0.28) |
95% CI | [1.39;2.18] | [3.29;4.28] | [4.52;5.63] | |
P-value | <0.001 | <0.001 | <0.001 | |
D4 | Estimate (SE) | 1.83(0.20) | 3.80(0.25) | 5.17(0.28) |
95% CI | [1.43;2.23] | [3.31;4.30] | [4.62;5.73] | |
P-value | <0.001 | <0.001 | <0.001 | |
D5 | Estimate (SE) | 2.28(0.20) | 3.64(0.24) | 4.42(0.27) |
95% CI | [1.89;2.68] | [3.15;4.12] | [3.88;4.96] | |
P-value | <0.001 | <0.001 | <0.001 | |
It's the same usage
contr=contrast(lsm, "trt.vs.ctrl", ref = "A")
# There is just only one explicative variable
tab.contr=report.lsmeans(lsm=contr,at="TIMEPOINT")
report.doc(tab.contr,title="LS-Means contrast example",
colspan.value="Contrasts")
Table 9: LS-Means contrast example | |||
Contrasts | |||
TIMEPOINT | Statistics | B - A | C - A |
D1 | Estimate (SE) | 2.36(0.31) | 3.19(0.34) |
95% CI | [1.66;3.06] | [2.44;3.94] | |
P-value | <0.001 | <0.001 | |
D2 | Estimate (SE) | 2.10(0.32) | 2.94(0.34) |
95% CI | [1.38;2.81] | [2.19;3.69] | |
P-value | <0.001 | <0.001 | |
D3 | Estimate (SE) | 2.00(0.32) | 3.29(0.34) |
95% CI | [1.29;2.71] | [2.53;4.05] | |
P-value | <0.001 | <0.001 | |
D4 | Estimate (SE) | 1.97(0.32) | 3.34(0.34) |
95% CI | [1.26;2.68] | [2.58;4.11] | |
P-value | <0.001 | <0.001 | |
D5 | Estimate (SE) | 1.35(0.31) | 2.14(0.33) |
95% CI | [0.66;2.05] | [1.39;2.88] | |
P-value | <0.001 | <0.001 | |
library(survival)
data(time_to_cure)
fit <- coxph(Surv(time, status) ~ Group, data = time_to_cure)
em=emmeans(fit,~Group,type="response")
pairs=pairs(em,adjust="none",exclude="Untreated")
tab.pairs=report.lsmeans(pairs)
tab.pairs
#>
#> ############################################
#> LS-Means comparisons of: time
#> ############################################
#>
#> Statistics Group A / Group B Group A / Group C Group B / Group C
#> 1 Estimate (SE) 0.66(0.25) 0.49(0.19) 0.74(0.27)
#> 2 95% CI [0.31;1.39] [0.23;1.04] [0.36;1.51]
#> 3 P-value 0.270 0.060 0.410
#>
#> ############################################
report.doc(tab.pairs,title="Hazard ratios of a Cox model")
Table 10: Hazard ratios of a Cox model | |||
Statistics | Group A / Group B | Group A / Group C | Group B / Group C |
Estimate (SE) | 0.66(0.25) | 0.49(0.19) | 0.74(0.27) |
95% CI | [0.31;1.39] | [0.23;1.04] | [0.36;1.51] |
P-value | 0.270 | 0.060 | 0.410 |