Application
# clinical features
X <- read.csv("data/PPMI_Baseline_Data_02Jul2018.csv",row.names="PATNO",na.strings=c(".",""))
colnames(X) <- tolower(colnames(X))
X <- X[X$apprdx==1,] # Parkinson's disease
X[c("site","apprdx","event_id","symptom5_comment")] <- NULL
for(i in seq_len(ncol(X))){
if(is.factor(X[[i]])){levels(X[[i]]) <- paste0("-",levels(X[[i]]))}
}
100*mean(is.na(X)) # proportion missingness
x <- lapply(seq_len(1),function(x) missRanger::missRanger(data=X,pmm.k=3,
num.trees=100,verbose=0,seed=1))
x <- lapply(x,function(x) model.matrix(~.-1,data=x))
# genomic features
load("data/ppmi_rnaseq_bl_pd_hc-2019-01-11.Rdata",verbose=TRUE)
counts <- t(ppmi_rnaseq_bl_pdhc)
mean(grepl(pattern="ENSG0000|ENSGR0000",x=colnames(counts)))
cond <- apply(counts,2,function(x) sd(x)>0) & !grepl(pattern="ENSG0000|ENSGR0000",x=colnames(counts))
Z <- palasso:::.prepare(counts[,cond],filter=10,cutoff="knee")$X
# outcome
Y <- read.csv("data/PPMI_Year_1-3_Data_02Jul2018.csv",na.strings=".")
Y <- Y[Y$APPRDX==1 & Y$EVENT_ID %in% c("V04","V06","V08"),]
colnames(Y)[colnames(Y)=="updrs_totscore"] <- "updrs"
vars <- c("moca","quip","updrs","gds","scopa","ess","bjlot","rem")
# too few levels: "NP1HALL","NP1DPRS"
Y <- Y[,c("EVENT_ID","PATNO",vars)]
Y <- reshape(Y,idvar="PATNO",timevar="EVENT_ID",direction="wide")
rownames(Y) <- Y$PATNO; Y$PATNO <- NULL
# overlap
names <- Reduce(intersect,list(rownames(X),rownames(Y),rownames(Z)))
Z <- Z[names,]
Y <- Y[names,]
Y <- sapply(vars,function(x) Y[,grepl(pattern=x,x=colnames(Y))],simplify=FALSE)
for(i in seq_along(Y)){
colnames(Y[[i]]) <- c("V04","V06","V08")
}
x <- lapply(x,function(x) x[names,]); rm(names)
X <- x[[1]]; rm(x) # impute multiple times!
# inversion for positive correlation
Y$moca <- -Y$moca # "wrong" sign
Y$bjlot <- -Y$bjlot # "wrong" sign
sapply(Y,function(x) range(unlist(x),na.rm=TRUE))
save(Y,X,Z,file="results/data.RData")
writeLines(text=capture.output(utils::sessionInfo(),cat("\n"),sessioninfo::session_info()),con="results/info_dat.txt")
load("results/data.RData",verbose=TRUE)
#grDevices::pdf(file="manuscript/figure_COR.pdf",width=6,height=3)
grDevices::postscript(file="manuscript/figure_COR.eps",width=6,height=3)
graphics::par(mar=c(0.5,3,2,0.5))
graphics::layout(mat=matrix(c(1,2),nrow=1,ncol=2),width=c(0.2,0.8))
# correlation between years
cor <- cbind(sapply(Y,function(x) cor(x[,1],x[,2],use="complete.obs",method="spearman")),
sapply(Y,function(x) cor(x[,2],x[,3],use="complete.obs",method="spearman")),
sapply(Y,function(x) cor(x[,1],x[,3],use="complete.obs",method="spearman")))
colnames(cor) <- c("1-2","2-3","1-3")
cor <- rowMeans(cor)
joinet:::plot.matrix(cor,range=c(-3,3),margin=1,cex=0.7)
# correlation between variables
cor <- 1/3*cor(sapply(Y,function(x) x[,1]),use="complete.obs",method="spearman")+
1/3*cor(sapply(Y,function(x) x[,2]),use="complete.obs",method="spearman")+
1/3*cor(sapply(Y,function(x) x[,3]),use="complete.obs",method="spearman")
joinet:::plot.matrix(cor,range=c(-3,3),margin=c(1,2),cex=0.7)
grDevices::dev.off()
# other information
sapply(Y,colMeans,na.rm=TRUE) # increasing values
sapply(Y,function(x) apply(x,2,sd,na.rm=TRUE)) # increasing variance
sapply(Y,function(x) colSums(is.na(x))) # increasing numbers of NAs
#<<start>>
set.seed(1)
load("results/data.RData",verbose=TRUE)
set.seed(1)
foldid.ext <- rep(1:5,length.out=nrow(Y$moca))
foldid.int <- rep(rep(1:10,each=5),length.out=nrow(Y$moca))
table(foldid.ext,foldid.int)
#- - - - - - - - - - - - -
#- - internal coaching - -
#- - - - - - - - - - - - -
table <- list()
table$alpha <- c("lasso","ridge")
table$data <- c("clinic","omics","both")
table$var <- names(Y)
table <- rev(expand.grid(table,stringsAsFactors=FALSE))
loss <- fit <- list()
for(i in seq_len(nrow(table))){
cat(rep("*",times=5),"setting",i,rep("*",times=5),"\n")
y <- Y[[table$var[i]]]
x <- list(clinic=X,omics=Z,both=cbind(X,Z))[[table$data[i]]]
alpha <- 1*(table$alpha[i]=="lasso")
loss[[i]] <- cv.joinet(Y=y,X=x,alpha.base=alpha,foldid.ext=foldid.ext,
foldid.int=foldid.int) # add joinet::
#fit[[i]] <- joinet(Y=y,X=x,alpha.base=alpha,foldid=foldid.int)
}
save(table,loss,file="results/internal.RData")
#- - - - - - - - - - - - -
#- - external coaching - -
#- - - - - - - - - - - - -
table <- list()
temp <- utils::combn(x=names(Y),m=2)
table$comb <- paste0(temp[1,],"-",temp[2,])
table$step <- c("V04","V06","V08")
table$alpha <- c("lasso","ridge")
table$data <- c("clinic","omics","both")
table <- rev(expand.grid(table,stringsAsFactors=FALSE))
temp <- strsplit(table$comb,split="-"); table$comb <- NULL
table$var1 <- sapply(temp,function(x) x[[1]])
table$var2 <- sapply(temp,function(x) x[[2]])
loss <- fit <- list()
for(i in seq_len(nrow(table))){
cat(rep("*",times=5),"setting",i,rep("*",times=5),"\n")
y <- cbind(Y[[table$var1[i]]][,table$step[i]],
Y[[table$var2[i]]][,table$step[i]])
x <- list(clinic=X,omics=Z,both=cbind(X,Z))[[table$data[i]]]
alpha <- 1*(table$alpha[i]=="lasso")
loss[[i]] <- cv.joinet(Y=y,X=x,alpha.base=alpha,
foldid.ext=foldid.ext,foldid.int=foldid.int) # add joinet::
#fit[[i]] <- joinet(Y=y,X=x,alpha.base=alpha,foldid=foldid.int)
}
save(table,loss,file="results/external.RData")
writeLines(text=capture.output(utils::sessionInfo(),cat("\n"),sessioninfo::session_info()),con="results/info_app.txt")
load("results/internal.RData")
# standardised loss
vars <- unique(table$var)
former <- t(sapply(loss,function(x) x["base",]))
min <- sapply(vars,function(x) min(former[table$var==x,]))
max <- sapply(vars,function(x) max(former[table$var==x,]))
index <- match(x=table$var,table=vars)
former <- (former-min[index])/(max[index]-min[index])
dimnames(former) <- list(table$var,seq_len(3))
# percentage change
change <- t(sapply(loss,function(x) 100*(x["meta",]-x["base",])/x["base",]))
dimnames(change) <- list(table$var,c("1st","2nd","3rd"))
# overview
#grDevices::pdf(file="manuscript/figure_INT.pdf",width=6,height=3,pointsize=12)
grDevices::postscript(file="manuscript/figure_INT.eps",width=6,height=3,pointsize=12)
graphics::par(mfrow=c(2,3),mar=c(0.1,3,2,0.1),oma=c(0,1.1,1,0))
for(alpha in c("lasso","ridge")){
for(data in c("clinic","omics","both")){
cond <- table$alpha==alpha & table$data==data
joinet:::plot.matrix(X=change[cond,],range=c(-50,50),cex=0.7)
#graphics::title(main=paste0(alpha,"-",data),col.main="red",line=0) # check
if(alpha=="lasso"){graphics::mtext(text=data,side=3,line=1.5,cex=0.8)}
if(data=="clinic"){graphics::mtext(text=alpha,side=2,line=3,cex=0.8)}
}
}
grDevices::dev.off()
TEMP <- tapply(X=rowMeans(change),INDEX=table$var,FUN=mean)[vars]
mean(change<0)
round(tapply(X=rowMeans(change),INDEX=table$data,FUN=mean),digits=2)
round(tapply(X=rowMeans(change),INDEX=table$alpha,FUN=mean),digits=2)
round(colMeans(change),digits=2)
load("results/external.RData")
data <- c("clinic","omics","both")
alpha <- c("lasso","ridge")
step <- c("V04","V06","V08")
# percentage change
change <- t(sapply(loss,function(x) 100*(x["meta",]-x["base",])/x["base",]))
# overview
vars <- unique(c(table$var1,table$var2))
temp <- matrix(NA,nrow=length(vars),ncol=length(vars),dimnames=list(vars,vars))
array <- array(data=list(temp),dim=c(3,2,3),dimnames=list(data,alpha,step))
#grDevices::pdf(file="manuscript/figure_EXT.pdf",width=7.5,height=10,pointsize=14)
grDevices::postscript(file="manuscript/figure_EXT.eps",width=7.5,height=10,pointsize=14)
graphics::par(mfrow=c(6,3),mar=c(0.1,2.5,2.5,0.1),oma=c(0,1,2,0))
for(i in data){
for(j in alpha){
for(k in step){
cond <- table$data==i & table$alpha==j & table$step==k
array[i,j,k][[1]][cbind(table$var1,table$var2)[cond,]] <- change[cond,1]
array[i,j,k][[1]][cbind(table$var2,table$var1)[cond,]] <- change[cond,2]
joinet:::plot.matrix(array[i,j,k][[1]],margin=0,las=2,range=c(-20,20),cex=0.6)
#graphics::title(main=paste0(i,"-",j,"-",k),col.main="red",line=0) # check
if(i=="clinic" & j=="lasso"){graphics::mtext(text=ifelse(k=="V04","1st",ifelse(k=="V06","2nd","3rd")),side=3,line=2.5,cex=0.8)}
if(k=="V04"){graphics::mtext(text=paste0(i,"-",j),side=2,line=2.5,cex=0.8)}
}
}
}
grDevices::dev.off()
# check
i <- sample(seq_len(nrow(table)),size=1)
table[i,]
x <- loss[[i]]
100*(x["meta",]-x["base",])/x["base",]
#grDevices::pdf(file="manuscript/figure_ALL.pdf",height=3,width=6)
grDevices::postscript(file="manuscript/figure_ALL.eps",height=3,width=6)
graphics::par(mar=c(0.5,3,2,0.5))
graphics::layout(mat=matrix(c(1,2),nrow=1,ncol=2),width=c(0.2,0.8))
joinet:::plot.matrix(as.matrix(TEMP),margin=1,las=1,range=c(-20,20),cex=0.7)
sum(unlist(array)<0,na.rm=TRUE)/sum(!is.na(unlist(array)))
means <- apply(array,c(1,2,3),function(x) mean(x[[1]],na.rm=TRUE))
lapply(seq_len(3),function(x) apply(means,x,mean))
mean <- 1/length(array)*Reduce(f="+",x=array)
joinet:::plot.matrix(mean,margin=1,las=1,range=c(-20,20),cex=0.7)
# rows: target variable, columns: coaching variable
grDevices::dev.off()
#grDevices::pdf(file="manuscript/figure_DIF.pdf",height=1.2,width=5)
grDevices::postscript(file="manuscript/figure_DIF.eps",height=1.2,width=5)
load("results/internal.RData")
vars <- unique(table$var)
base <- t(sapply(loss,function(x) 100*(x["base",]-x["none",])/x["none",]))
meta <- t(sapply(loss,function(x) 100*(x["meta",]-x["none",])/x["none",]))
dimnames(meta) <- dimnames(base) <- list(table$var,c("1st","2nd","3rd"))
standard <- tapply(X=rowMeans(base),INDEX=table$var,FUN=mean)[vars]
internal <- tapply(X=rowMeans(meta),INDEX=table$var,FUN=mean)[vars]
load("results/external.RData")
vars <- unique(c(table$var1,table$var2))
base <- meta <- list()
for(i in seq_len(2)){
base[[i]] <- sapply(loss,function(x) 100*(x["base",i]-x["none",i])/x["none",i])
meta[[i]] <- sapply(loss,function(x) 100*(x["meta",i]-x["none",i])/x["none",i])
}
index <- c(table$var1,table$var2)
base <- unlist(base); meta <- unlist(meta)
#standard <- tapply(X=base,INDEX=index,FUN=mean)[vars]
external <- tapply(X=meta,INDEX=index,FUN=mean)[vars]
matrix <- round(rbind(standard,internal,external),digits=2)
rownames(matrix) <- c("","","")
graphics::par(mfrow=c(1,1),mar=c(0.5,3,1.5,1))
joinet:::plot.matrix(matrix,margin=c(1,2),las=1,range=c(-100,0),cex=0.7,digits=3)
grDevices::dev.off()