## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"-------------------- BiocStyle::latex() ## ----prepareData,echo=T,cache=F-------------------------------------------- library(RnaSeqSampleSize) ## ----singlePower,echo=TRUE,tidy=TRUE,cache=T------------------------------- example(est_power) ## ----singleSampleSize,each=TRUE,tidy=TRUE,cache=T-------------------------- example(sample_size) ## ----showData,echo=F,cache=F----------------------------------------------- data(package="RnaSeqSampleSizeData")$results[,"Item"] ## ----distributionPower1,echo=TRUE,tidy=FALSE,cache=TRUE-------------------- est_power_distribution(n=65,f=0.01,rho=2, distributionObject="TCGA_READ",repNumber=5) ## ----distributionPower2,echo=TRUE,tidy=FALSE,cache=TRUE-------------------- #Power estimation based on some interested genes. #We use storeProcess=TRUE to return the details for all selected genes. selectedGenes<-c("A1BG","A2BP1","A2M","A4GALT","AAAS") powerDistribution<-est_power_distribution(n=65,f=0.01,rho=2, distributionObject="TCGA_READ", selectedGenes=selectedGenes, storeProcess=TRUE) str(powerDistribution) mean(powerDistribution$power) ## ----distributionPower3,echo=TRUE,tidy=FALSE,cache=T,eval=FALSE------------ # powerDistribution<-est_power_distribution(n=65,f=0.01,rho=2, # distributionObject="TCGA_READ",pathway="00010", # minAveCount=1,storeProcess=TRUE) # mean(powerDistribution$power) ## ----distributionSampleSize,echo=TRUE,tidy=FALSE,cache=T------------------- sample_size_distribution(power=0.8,f=0.01,distributionObject="TCGA_READ", repNumber=5,showMessage=TRUE) ## ----generateUserData,echo=TRUE,tidy=TRUE,cache=T-------------------------- #Generate a 10000*10 RNA-seq data as prior dataset set.seed(123) dataMatrix<-matrix(sample(0:3000,100000,replace=TRUE),nrow=10000,ncol=10) colnames(dataMatrix)<-c(paste0("Control",1:5),paste0("Treatment",1:5)) row.names(dataMatrix)<-paste0("gene",1:10000) head(dataMatrix) ## ----userDataSampleSize,echo=TRUE,tidy=FALSE,cache=TRUE-------------------- #Estitamete the gene read count and dispersion distribution dataMatrixDistribution<-est_count_dispersion(dataMatrix, group=c(rep(0,5),rep(1,5))) #Power estimation by read count and dispersion distribution est_power_distribution(n=65,f=0.01,rho=2, distributionObject=dataMatrixDistribution,repNumber=5) ## ----librarySizeAndGeneRange,echo=TRUE,tidy=FALSE,cache=TRUE,message=FALSE,eval=FALSE---- # library(recount) # studyId="SRP009615" # url <- download_study(studyId) # load(file.path(studyId, 'rse_gene.Rdata')) # # #show percent of mapped reads # plot_mappedReads_percent(rse_gene) # #show propotion of gene counts in different range # plot_gene_counts_range(rse_gene,targetSize = 4e+07) # ## ----AnalyzeDataSet,echo=TRUE,tidy=FALSE,cache=TRUE,message=FALSE,eval=FALSE---- # # library(ExpressionAtlas) # # # projectId="E-ENAD-46" # allExps <- getAtlasData(projectId) # ExpressionAtlasObj <- allExps[[ projectId ]]$rnaseq # # #only keeping "g2" (COVID-19) and "g4" (normal) samples # expObj=ExpressionAtlasObj[,which(colData(ExpressionAtlasObj)$AtlasAssayGroup %in% c("g2","g4"))] # expObjGroups= 2-as.integer(as.factor(colData(expObj)$AtlasAssayGroup)) #0 for normal and 1 for COVID-19 samples # #only keeping genes with at least 10 counts # minAveCount=10 # averageCountsGene=rowSums(assay(expObj))/ncol(expObj) # expObjFilter=expObj[which(averageCountsGene>=minAveCount),] # # result=analyze_dataset(expObjFilter,expObjGroups=expObjGroups) # ## ----singlePowerCurves,echo=TRUE,tidy=TRUE,cache=T------------------------- result1<-est_power_curve(n=63, f=0.01, rho=2, lambda0=5, phi0=0.5) result2<-est_power_curve(n=63, f=0.05, rho=2, lambda0=5, phi0=0.5) plot_power_curve(list(result1,result2)) ## ----optimazation,echo=TRUE,tidy=FALSE,cache=T----------------------------- result<-optimize_parameter(fun=est_power,opt1="n", opt2="lambda0",opt1Value=c(3,5,10,15,20), opt2Value=c(1:5,10,20))