## ----echo = 4:5, message = FALSE, warning = FALSE-----------------------------
# This installs packages from BioConductor
# if (!requireNamespace("BiocManager", quietly=TRUE))
# install.packages("BiocManager")
# BiocManager::install("powerTCR")
library(powerTCR)
data("repertoires")
## -----------------------------------------------------------------------------
str(repertoires)
## ----warning = FALSE----------------------------------------------------------
# This will loop through our list of sample repertoires,
# and store a fit in each
fits <- list()
for(i in seq_along(repertoires)){
# Choose a sequence of possible u for your model fit
# Ideally, you want to search a lot of thresholds, but for quick
# computation, we are only going to test 4
thresholds <- unique(round(quantile(repertoires[[i]], c(.75,.8,.85,.9))))
fits[[i]] <- fdiscgammagpd(repertoires[[i]], useq = thresholds,
shift = min(repertoires[[i]]))
}
names(fits) <- names(repertoires)
## -----------------------------------------------------------------------------
# You could also look at the first sample by typing fits[[1]]
fits$samp1
## -----------------------------------------------------------------------------
# Grab mles of fits:
get_mle(fits)
# Grab negative log likelihoods of fits
get_nllh(fits)
## -----------------------------------------------------------------------------
desponds_fits <- list()
for(i in seq_along(repertoires)){
desponds_fits[[i]] <- fdesponds(repertoires[[i]])
}
names(desponds_fits) <- names(repertoires)
desponds_fits$samp1
## -----------------------------------------------------------------------------
# The number of clones in each sample
n1 <- length(repertoires[[1]])
n2 <- length(repertoires[[2]])
# Grids of quantiles to check
# (you want the same number of points as were observed in the sample)
q1 <- seq(n1/(n1+1), 1/(n1+1), length.out = n1)
q2 <- seq(n2/(n2+1), 1/(n2+1), length.out = n2)
# Compute the value of fitted distributions at grid of quantiles
theor1 <- qdiscgammagpd(q1, fits[[1]])
theor2 <- qdiscgammagpd(q2, fits[[2]])
## ----fig.wide=TRUE, echo=2:7--------------------------------------------------
par(mfrow = c(1,2))
plot(log(repertoires[[1]]), log(seq_len(n1)), pch = 16, cex = 2,
xlab = "log clone size", ylab = "log rank", main = "samp1")
points(log(theor1), log(seq_len(n1)), pch = 'x', col = "darkcyan")
plot(log(repertoires[[2]]), log(seq_len(n2)), pch = 16, cex = 2,
xlab = "log clone size", ylab = "log rank", main = "samp2")
points(log(theor2), log(seq_len(n2)), pch = 'x', col = "chocolate")
## -----------------------------------------------------------------------------
# Simulate 3 sampled repertoires
set.seed(123)
s1 <- rdiscgammagpd(1000, shape = 3, rate = .15, u = 25, sigma = 15,
xi = .5, shift = 1)
s2 <- rdiscgammagpd(1000, shape = 3.1, rate = .14, u = 26, sigma = 15,
xi = .6, shift = 1)
s3 <- rdiscgammagpd(1000, shape = 10, rate = .3, u = 45, sigma = 20,
xi = .7, shift = 1)
## -----------------------------------------------------------------------------
bad <- rdiscgammagpd(1000, shape = 1, rate = 2, u = 25, sigma = 10,
xi = .5, shift = 1, phi = .2)
plot(log(sort(bad, decreasing = TRUE)), log(seq_len(1000)), pch = 16,
xlab = "log clone size", ylab = "log rank", main = "bad simulation")
## -----------------------------------------------------------------------------
# Fit model to the data at the true thresholds
sim_fits <- list("s1" = fdiscgammagpd(s1, useq = 25),
"s2" = fdiscgammagpd(s2, useq = 26),
"s3" = fdiscgammagpd(s3, useq = 45))
# Compute the pairwise JS distance between 3 fitted models
grid <- min(c(s1,s2,s3)):10000
distances <- get_distances(sim_fits, grid, modelType="Spliced")
## -----------------------------------------------------------------------------
distances
## ----fig.small=TRUE-----------------------------------------------------------
clusterPlot(distances, method = "ward.D")
## ----echo=2:3, message = FALSE, warning = FALSE-------------------------------
library(vegan)
get_diversity(sim_fits)
## -----------------------------------------------------------------------------
boot <- get_bootstraps(fits, resamples = 5, cores = 1, gridStyle = "copy")
## ----echo = 3:5, message = FALSE, warning = FALSE-----------------------------
library(magrittr)
library(purrr)
mles <- get_mle(boot[[1]])
xi_CI <- map(mles, 'xi') %>%
unlist %>%
quantile(c(.025,.5,.975))
xi_CI