## ----style, eval=TRUE, echo=FALSE, results="asis"---------------------------
BiocStyle::latex(width=78, use.unsrturl=FALSE)
## ----echo=FALSE-------------------------------------------------------------
library(ccrepe)
## ----eval=FALSE-------------------------------------------------------------
# sim.score.args = list(method="spearman", use="complete.obs")
## ----eval=FALSE-------------------------------------------------------------
# ccrepe(
# x = NA,
# y = NA,
# sim.score = cor,
# sim.score.args = list(),
# min.subj = 20,
# iterations = 1000,
# subset.cols.x = NULL,
# subset.cols.y = NULL,
# errthresh = 1e-04,
# verbose = FALSE,
# iterations.gap = 100,
# distributions = NA,
# compare.within.x = TRUE,
# concurrent.output = NA,
# make.output.table = FALSE)
## ---------------------------------------------------------------------------
data <- matrix(rlnorm(40,meanlog=0,sdlog=1),nrow=10,ncol=4)
data[,1] = 2*data[,2] + rnorm(10,0,0.01)
data.rowsum <- apply(data,1,sum)
data.norm <- data/data.rowsum
apply(data.norm,1,sum) # The rows sum to 1, so the data are normalized
test.input <- data.norm
dimnames(test.input) <- list(c(
"Sample 1", "Sample 2","Sample 3","Sample 4","Sample 5",
"Sample 6","Sample 7","Sample 8","Sample 9","Sample 10"),
c("Feature 1", "Feature 2", "Feature 3","Feature 4"))
test.output <- ccrepe(x=test.input, iterations=20, min.subj=10)
## ----fig.cap="Non-normalized and normalized associations between feature 1 and feature 2. In this case we would expect feature 1 and feature 2 to be associated. In the output we see this by the positive sim.score value in the [1,2] element of test.output\\$sim.score and the small q-value in the [1,2] element of test.output\\$q.values.", fig.width=7, fig.height=3.5,fig.pos="H"----
par(mfrow=c(1,2))
plot(data[,1],data[,2],xlab="Feature 1",ylab="Feature 2",main="Non-normalized")
plot(data.norm[,1],data.norm[,2],xlab="Feature 1",ylab="Feature 2",
main="Normalized")
test.output
## ---------------------------------------------------------------------------
data <- matrix(rlnorm(40,meanlog=0,sdlog=1),nrow=10,ncol=4)
data[,1] = 2*data[,2] + rnorm(10,0,0.01)
data.rowsum <- apply(data,1,sum)
data.norm <- data/data.rowsum
apply(data.norm,1,sum) # The rows sum to 1, so the data are normalized
test.input <- data.norm
data2 <- matrix(rlnorm(105,meanlog=0,sdlog=1),nrow=15,ncol=7)
aligned.rows <- c(seq(1,4),seq(6,9),11,12) # The datasets dont need
# to have subjects line up exactly
data2[aligned.rows,1] <- 2*data[,3] + rnorm(10,0,0.01)
data2.rowsum <- apply(data2,1,sum)
data2.norm <- data2/data2.rowsum
apply(data2.norm,1,sum) # The rows sum to 1, so the data are normalized
test.input.2 <- data2.norm
dimnames(test.input) <- list(paste("Sample",seq(1,10)),paste("Feature",seq(1,4)))
dimnames(test.input.2) <- list(paste("Sample",c(seq(1,4),11,seq(5,8),12,9,10,13,14,15)),paste("Feature",seq(1,7)))
test.output.two.datasets <- ccrepe(x=test.input, y=test.input.2, iterations=20, min.subj=10)
## ----fig.cap="Non-normalized and normalized associations between feature 1 and feature 2. In this case we would expect feature 1 and feature 2 to be associated. In the output we see this by the positive sim.score value in the [1,2] element of test.output\\$sim.score and the small q-value in the [1,2] element of test.output\\$q.values.", fig.width=7, fig.height=3.5, fig.pos="H"----
par(mfrow=c(1,2))
plot(data2[aligned.rows,1],data[,3],xlab="dataset 2: Feature 1",ylab="dataset 1: Feature 3",main="Non-normalized")
plot(data2.norm[aligned.rows,1],data.norm[,3],xlab="dataset 2: Feature 1",ylab="dataset 1: Feature 3",
main="Normalized")
test.output.two.datasets
## ---------------------------------------------------------------------------
data <- matrix(rlnorm(40,meanlog=0,sdlog=1),nrow=10,ncol=4)
data[,1] = 2*data[,2] + rnorm(10,0,0.01)
data.rowsum <- apply(data,1,sum)
data.norm <- data/data.rowsum
apply(data.norm,1,sum) # The rows sum to 1, so the data are normalized
test.input <- data.norm
dimnames(test.input) <- list(paste("Sample",seq(1,10)),paste("Feature",seq(1,4)))
test.output.nc.score <- ccrepe(x=test.input, sim.score=nc.score, iterations=20, min.subj=10)
## ----fig.cap="Non-normalized and normalized associations between feature 1 and feature 2. In this case we would expect feature 1 and feature 2 to be associated. In the output we see this by the positive sim.score value in the [1,2] element of test.output\\$sim.score and the small q-value in the [1,2] element of test.output\\$q.values. In this case, however, the sim.score represents the NC-Score between two features rather than the Spearman correlation.", fig.width=7, fig.height=3.5, fig.pos="H"----
par(mfrow=c(1,2))
plot(data[,1],data[,2],xlab="Feature 1",ylab="Feature 2",main="Non-normalized")
plot(data.norm[,1],data.norm[,2],xlab="Feature 1",ylab="Feature 2",
main="Normalized")
test.output.nc.score
## ---------------------------------------------------------------------------
data <- matrix(rlnorm(40,meanlog=0,sdlog=1),nrow=10,ncol=4)
data[,1] = 2*data[,2] + rnorm(10,0,0.01)
data.rowsum <- apply(data,1,sum)
data.norm <- data/data.rowsum
apply(data.norm,1,sum) # The rows sum to 1, so the data are normalized
test.input <- data.norm
dimnames(test.input) <- list(paste("Sample",seq(1,10)),paste("Feature",seq(1,4)))
my.test.sim.score <- function(x,y=NA,constant=0.5){
if(is.vector(x) && is.vector(y)) return(constant)
if(is.matrix(x) && is.na(y)) return(matrix(rep(constant,ncol(x)^2),ncol=ncol(x)))
if(is.data.frame(x) && is.na(y)) return(matrix(rep(constant,ncol(x)^2),ncol=ncol(x)))
else stop('ERROR')
}
test.output.sim.score <- ccrepe(x=test.input, sim.score=my.test.sim.score, iterations=20, min.subj=10, sim.score.args = list(constant = 0.6))
## ----fig.cap="Non-normalized and normalized associations between feature 1 and feature 2. In this case we would expect feature 1 and feature 2 to be associated. Note that the values of sim.score are all 0.6 and none of the p-values are very small because of the arbitrary definition of the similarity score.", fig.width=7, fig.height=3.5, fig.pos="H"----
par(mfrow=c(1,2))
plot(data[,1],data[,2],xlab="Feature 1",ylab="Feature 2",main="Non-normalized")
plot(data.norm[,1],data.norm[,2],xlab="Feature 1",ylab="Feature 2",
main="Normalized")
test.output.sim.score
## ----<----------------------------------------------------------------------
data <- matrix(rlnorm(40,meanlog=0,sdlog=1),nrow=10,ncol=4)
data.rowsum <- apply(data,1,sum)
data.norm <- data/data.rowsum
apply(data.norm,1,sum) # The rows sum to 1, so the data are normalized
test.input <- data.norm
dimnames(test.input) <- list(paste("Sample",seq(1,10)),paste("Feature",seq(1,4)))
test.output.1.3 <- ccrepe(x=test.input, iterations=20, min.subj=10, subset.cols.x=c(1,3))
test.output.1 <- ccrepe(x=test.input, iterations=20, min.subj=10, subset.cols.x=c(1), compare.within.x=FALSE)
test.output.12.3 <- ccrepe(x=test.input, iterations=20, min.subj=10, subset.cols.x=c(1,2),subset.cols.y=c(3), compare.within.x=FALSE)
test.output.1.3$sim.score
test.output.1$sim.score
test.output.12.3$sim.score
## ----eval=FALSE-------------------------------------------------------------
# nc.score(
# x,
# y = NULL,
# use = "everything",
# nbins = NULL,
# bin.cutoffs=NULL)
## ---------------------------------------------------------------------------
data <- matrix(rlnorm(40,meanlog=0,sdlog=1),nrow=10,ncol=4)
data.rowsum <- apply(data,1,sum)
data[,1] = 2*data[,2] + rnorm(10,0,0.01)
data.norm <- data/data.rowsum
apply(data.norm,1,sum) # The rows sum to 1, so the data are normalized
test.input <- data.norm
dimnames(test.input) <- list(paste("Sample",seq(1,10)),paste("Feature",seq(1,4)))
test.output.matrix <- nc.score(x=test.input)
test.output.num <- nc.score(x=test.input[,1],y=test.input[,2])
## ----fig.cap="Non-normalized and normalized associations between feature 1 and feature 2 of the second example. Again, we expect to observe a positive association between feature 1 and feature 2. In terms of generalized checkerboard scores, we would expect to see more co-variation patterns than co-exclusion patterns. This is shown by the positive and relatively high value of the [1,2] element of test.output.matrix (which is identical to test.output.num)", fig.height=3, fig.pos="H"----
par(mfrow=c(1, 2))
plot(data[,1],data[,2],xlab="Feature 1",ylab="Feature 2",main="Non-normalized")
plot(data.norm[,1],data.norm[,2],xlab="Feature 1",ylab="Feature 2",
main="Normalized")
test.output.matrix
test.output.num
## ---------------------------------------------------------------------------
data <- matrix(rlnorm(40,meanlog=0,sdlog=1),nrow=10,ncol=4)
data.rowsum <- apply(data,1,sum)
data[,1] = 2*data[,2] + rnorm(10,0,0.01)
data.norm <- data/data.rowsum
apply(data.norm,1,sum) # The rows sum to 1, so the data are normalized
test.input <- data.norm
dimnames(test.input) <- list(paste("Sample",seq(1,10)),paste("Feature",seq(1,4)))
test.output <- nc.score(x=test.input,nbins=4)
## ----fig.cap="Non-normalized and normalized associations between feature 1 and feature 2 of the second example. Again, we expect to observe a positive association between feature 1 and feature 2. In terms of generalized checkerboard scores, we would expect to see more co-variation patterns than co-exclusion patterns. This is shown by the positive and relatively high value in the [1,2] element of test.output. In this case, the smaller bin number yields a smaller NC-score because of the coarser partitioning of the data.", fig.height=3, fig.pos="H"----
par(mfrow=c(1, 2))
plot(data[,1],data[,2],xlab="Feature 1",ylab="Feature 2",main="Non-normalized")
plot(data.norm[,1],data.norm[,2],xlab="Feature 1",ylab="Feature 2",
main="Normalized")
test.output
## ---------------------------------------------------------------------------
data <- matrix(rlnorm(40,meanlog=0,sdlog=1),nrow=10,ncol=4)
data.rowsum <- apply(data,1,sum)
data[,1] = 2*data[,2] + rnorm(10,0,0.01)
data.norm <- data/data.rowsum
apply(data.norm,1,sum) # The rows sum to 1, so the data are normalized
test.input <- data.norm
dimnames(test.input) <- list(paste("Sample",seq(1,10)),paste("Feature",seq(1,4)))
test.output <- nc.score(x=test.input,bin.cutoffs=c(0.1,0.2,0.3))
## ----fig.cap="Non-normalized and normalized associations between feature 1 and feature 2 of the second example. Again, we expect to observe a positive association between feature 1 and feature 2. In terms of generalized checkerboard scores, we would expect to see more co-variation patterns than co-exclusion patterns. This is shown by the positive and relatively high value in the [1,2] element of test.output. The bin edges specified here represent almost absent ([ 0,0.001)), low abundance ([0.001,0.1)), medium abundance ([0.1,0.25)), and high abundance ([0.6,1)).", fig.height=3, fig.pos="H"----
par(mfrow=c(1, 2))
plot(data[,1],data[,2],xlab="Feature 1",ylab="Feature 2",main="Non-normalized")
plot(data.norm[,1],data.norm[,2],xlab="Feature 1",ylab="Feature 2",
main="Normalized")
test.output