## ----style, echo = FALSE, results = 'asis'-------------------------------------------------------- BiocStyle::markdown() options(width=100, max.print=1000) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE-------------------------------------------- suppressPackageStartupMessages({ library(systemPipeR) library(systemPipeRdata) library(BiocParallel) library(Biostrings) library(Rsamtools) library(GenomicRanges) library(ggplot2) library(GenomicAlignments) library(ShortRead) }) ## ----install, eval=FALSE-------------------------------------------------------------------------- # source("http://bioconductor.org/biocLite.R") # Sources the biocLite.R installation script # biocLite("systemPipeR") # Installs systemPipeR from Bioconductor # biocLite("tgirke/systemPipeRdata", build_vignettes=TRUE, dependencies=TRUE) # From github ## ----documentation, eval=FALSE-------------------------------------------------------------------- # library("systemPipeR") # Loads the package # library(help="systemPipeR") # Lists package info # vignette("systemPipeR") # Opens vignette ## ----targetsSE, eval=TRUE------------------------------------------------------------------------- library(systemPipeR) targetspath <- system.file("extdata", "targets.txt", package="systemPipeR") read.delim(targetspath, comment.char = "#") ## ----targetsPE, eval=TRUE------------------------------------------------------------------------- targetspath <- system.file("extdata", "targetsPE.txt", package="systemPipeR") read.delim(targetspath, comment.char = "#")[1:2,1:6] ## ----targetscomp, eval=TRUE----------------------------------------------------------------------- readComp(file=targetspath, format="vector", delim="-") ## ----param_structure, eval=TRUE------------------------------------------------------------------- parampath <- system.file("extdata", "tophat.param", package="systemPipeR") read.delim(parampath, comment.char = "#") ## ----param_import, eval=TRUE---------------------------------------------------------------------- args <- suppressWarnings(systemArgs(sysma=parampath, mytargets=targetspath)) args ## ----sysarg_access, eval=TRUE--------------------------------------------------------------------- names(args) modules(args) cores(args) outpaths(args)[1] sysargs(args)[1] ## ----load_package, eval=FALSE--------------------------------------------------------------------- # library(systemPipeR) ## ----construct_sysargs, eval=FALSE---------------------------------------------------------------- # args <- systemArgs(sysma="trim.param", mytargets="targets.txt") ## ----preprocessing, eval=FALSE-------------------------------------------------------------------- # preprocessReads(args=args, Fct="trimLRPatterns(Rpattern='GCCCGGGTAA', subject=fq)", # batchsize=100000, overwrite=TRUE, compress=TRUE) # writeTargetsout(x=args, file="targets_trim.txt") ## ----custom_preprocessing, eval=FALSE------------------------------------------------------------- # args <- systemArgs(sysma="trimPE.param", mytargets="targetsPE.txt") # filterFct <- function(fq, cutoff=20, Nexceptions=0) { # qcount <- rowSums(as(quality(fq), "matrix") <= cutoff) # fq[qcount <= Nexceptions] # Retains reads where Phred scores are >= cutoff with N exceptions # } # preprocessReads(args=args, Fct="filterFct(fq, cutoff=20, Nexceptions=0)", batchsize=100000) # writeTargetsout(x=args, file="targets_PEtrim.txt") ## ----fastq_quality, eval=FALSE-------------------------------------------------------------------- # fqlist <- seeFastq(fastq=infile1(args), batchsize=10000, klength=8) # pdf("./results/fastqReport.pdf", height=18, width=4*length(fqlist)) # seeFastqPlot(fqlist) # dev.off() ## ----fastq_quality_parallel_single, eval=FALSE---------------------------------------------------- # args <- systemArgs(sysma="tophat.param", mytargets="targets.txt") # f <- function(x) seeFastq(fastq=infile1(args)[x], batchsize=100000, klength=8) # fqlist <- bplapply(seq(along=args), f, BPPARAM = MulticoreParam(workers=8)) # seeFastqPlot(unlist(fqlist, recursive=FALSE)) ## ----fastq_quality_parallel_cluster, eval=FALSE--------------------------------------------------- # library(BiocParallel); library(BatchJobs) # f <- function(x) { # library(systemPipeR) # args <- systemArgs(sysma="tophat.param", mytargets="targets.txt") # seeFastq(fastq=infile1(args)[x], batchsize=100000, klength=8) # } # funs <- makeClusterFunctionsTorque("torque.tmpl") # param <- BatchJobsParam(length(args), resources=list(walltime="20:00:00", nodes="1:ppn=1", memory="6gb"), cluster.functions=funs) # register(param) # fqlist <- bplapply(seq(along=args), f) # seeFastqPlot(unlist(fqlist, recursive=FALSE)) ## ----bowtie_index, eval=FALSE--------------------------------------------------------------------- # args <- systemArgs(sysma="tophat.param", mytargets="targets.txt") # moduleload(modules(args)) # Skip if module system is not available # system("bowtie2-build ./data/tair10.fasta ./data/tair10.fasta") ## ----run_bowtie_single, eval=FALSE---------------------------------------------------------------- # bampaths <- runCommandline(args=args) ## ----run_bowtie_parallel, eval=FALSE-------------------------------------------------------------- # file.copy(system.file("extdata", ".BatchJobs.R", package="systemPipeR"), ".") # file.copy(system.file("extdata", "torque.tmpl", package="systemPipeR"), ".") # resources <- list(walltime="20:00:00", nodes=paste0("1:ppn=", cores(args)), memory="10gb") # reg <- clusterRun(args, conffile=".BatchJobs.R", template="torque.tmpl", Njobs=18, runid="01", # resourceList=resources) # waitForJobs(reg) ## ----process_monitoring, eval=FALSE--------------------------------------------------------------- # showStatus(reg) # file.exists(outpaths(args)) # sapply(1:length(args), function(x) loadResult(reg, x)) # Works after job completion ## ----align_stats1, eval=FALSE--------------------------------------------------------------------- # read_statsDF <- alignStats(args) # write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t") ## ----align_stats2, eval=TRUE---------------------------------------------------------------------- read.table(system.file("extdata", "alignStats.xls", package="systemPipeR"), header=TRUE)[1:4,] ## ----align_stats_parallel, eval=FALSE------------------------------------------------------------- # f <- function(x) alignStats(args[x]) # read_statsList <- bplapply(seq(along=args), f, BPPARAM = MulticoreParam(workers=8)) # read_statsDF <- do.call("rbind", read_statsList) ## ----align_stats_parallel_cluster, eval=FALSE----------------------------------------------------- # library(BiocParallel); library(BatchJobs) # f <- function(x) { # library(systemPipeR) # args <- systemArgs(sysma="tophat.param", mytargets="targets.txt") # alignStats(args[x]) # } # funs <- makeClusterFunctionsTorque("torque.tmpl") # param <- BatchJobsParam(length(args), resources=list(walltime="20:00:00", nodes="1:ppn=1", memory="6gb"), cluster.functions=funs) # register(param) # read_statsList <- bplapply(seq(along=args), f) # read_statsDF <- do.call("rbind", read_statsList) ## ----igv, eval=FALSE------------------------------------------------------------------------------ # symLink2bam(sysargs=args, htmldir=c("~/.html/", "somedir/"), # urlbase="http://myserver.edu/~username/", # urlfile="IGVurl.txt") ## ----bowtie2, eval=FALSE-------------------------------------------------------------------------- # args <- systemArgs(sysma="bowtieSE.param", mytargets="targets.txt") # moduleload(modules(args)) # Skip if module system is not available # bampaths <- runCommandline(args=args) ## ----bowtie2_cluster, eval=FALSE------------------------------------------------------------------ # qsubargs <- getQsubargs(queue="batch", cores=cores(args), memory="mem=10gb", time="walltime=20:00:00") # (joblist <- qsubRun(args=args, qsubargs=qsubargs, Nqsubs=18, package="systemPipeR")) ## ----bwamem_cluster, eval=FALSE------------------------------------------------------------------- # args <- systemArgs(sysma="bwa.param", mytargets="targets.txt") # moduleload(modules(args)) # Skip if module system is not available # system("bwa index -a bwtsw ./data/tair10.fasta") # Indexes reference genome # bampaths <- runCommandline(args=args) ## ----rsubread, eval=FALSE------------------------------------------------------------------------- # library(Rsubread) # args <- systemArgs(sysma="rsubread.param", mytargets="targets.txt") # buildindex(basename=reference(args), reference=reference(args)) # Build indexed reference genome # align(index=reference(args), readfile1=infile1(args), input_format="FASTQ", # output_file=outfile1(args), output_format="SAM", nthreads=8, indels=1, TH1=2) # for(i in seq(along=outfile1(args))) asBam(file=outfile1(args)[i], destination=gsub(".sam", "", outfile1(args)[i]), overwrite=TRUE, indexDestination=TRUE) ## ----gsnap, eval=FALSE---------------------------------------------------------------------------- # library(gmapR); library(BiocParallel); library(BatchJobs) # gmapGenome <- GmapGenome(reference(args), directory="data", name="gmap_tair10chr/", create=TRUE) # args <- systemArgs(sysma="gsnap.param", mytargets="targetsPE.txt") # f <- function(x) { # library(gmapR); library(systemPipeR) # args <- systemArgs(sysma="gsnap.param", mytargets="targetsPE.txt") # gmapGenome <- GmapGenome(reference(args), directory="data", name="gmap_tair10chr/", create=FALSE) # p <- GsnapParam(genome=gmapGenome, unique_only=TRUE, molecule="DNA", max_mismatches=3) # o <- gsnap(input_a=infile1(args)[x], input_b=infile2(args)[x], params=p, output=outfile1(args)[x]) # } # funs <- makeClusterFunctionsTorque("torque.tmpl") # param <- BatchJobsParam(length(args), resources=list(walltime="20:00:00", nodes="1:ppn=1", memory="6gb"), cluster.functions=funs) # register(param) # d <- bplapply(seq(along=args), f) ## ----genVar_workflow_single, eval=FALSE----------------------------------------------------------- # genWorkenvir(workflow="varseq") # setwd("varseq") ## ----sessionInfo---------------------------------------------------------------------------------- sessionInfo()