## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval=FALSE--------------------------------------------------------------- # bulk_long_pipeline(..., do_genome_align=FALSE, do_read_realign=FALSE) # # OR # sc_long_pipeline(..., do_genome_align=FALSE, do_read_realign=FALSE) ## ----eval=TRUE, echo=TRUE----------------------------------------------------- # download required files using BiocFileCache temp_path <- tempdir() bfc <- BiocFileCache::BiocFileCache(temp_path, ask=FALSE) file_url <- "https://raw.githubusercontent.com/OliverVoogd/FLAMESData/master/data" annot <- bfc[[names(BiocFileCache::bfcadd(bfc, "Annotation", paste(file_url, "SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf", sep="/")))]] # [[ notation is used to get the local file path of the downloaded file genome_fa <- bfc[[names(BiocFileCache::bfcadd(bfc, "Genomefa", paste(file_url, "SIRV_isoforms_multi-fasta_170612a.fasta", sep="/")))]] # download the two fastq files, move them to a folder to be merged together fastq1 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq1", paste(file_url, "fastq/sample1.fastq.gz", sep="/")))]] fastq2 <- bfc[[names(BiocFileCache::bfcadd(bfc, "Fastq2", paste(file_url, "fastq/sample2.fastq.gz", sep="/")))]] fastq_dir <- paste(temp_path, "fastq_dir", sep="/") # the downloaded fastq files need to be in a directory to be merged together dir.create(fastq_dir) file.copy(c(fastq1, fastq2), fastq_dir) unlink(c(fastq1, fastq2)) # the original files can be deleted # setup other environment variables config_file <- system.file("extdata/SIRV_config_default.json", package="FLAMES") # the configuration file is included with the FLAMES package outdir <- tempdir() # create a temporary output directory if (!dir.exists(outdir)) { dir.create(outdir) } ## ----eval=TRUE, echo=FALSE, results='hide'------------------------------------ # download files generated using minimap2 (these files need to be generated by the user on a system with access to Minimap2. More information is in the section [Genome Alignment](#genome-alignment-using-minimap2)) # files are renamed so that FLAMES can find related .bam.bai files using the same file name. BiocFileCache automatically gives the files a random prefix which affects this. genome_bam <- paste0(temp_path, "/align2genome.bam") file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Align BAM", paste(file_url, "align2genome.bam", sep="/")))]], genome_bam) genome_index <- paste0(temp_path, "/align2genome.bam.bai") file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Align BAM Index", paste0(file_url, "/align2genome.bam.bai")))]], genome_index) realign_bam <- paste0(temp_path, "/realign2transcript.bam") file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Realign BAM", paste(file_url, "realign2transcript.bam", sep="/")))]], realign_bam) realign_index <- paste0(temp_path, "/realign2transcript.bam.bai") file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Realign BAM Index", paste(file_url, "realign2transcript.bam.bai", sep="/")))]], realign_index) ## ----eval=TRUE---------------------------------------------------------------- library(FLAMES) # If running the FLAMES single cell pipeline: # pipeline_variables <- sc_windows_pipeline_setup(annot=annot, fastq=fastq_dir, outdir=outdir, # genome_fa=genome_fa, config_file=config_file) # or, if running the bulk pipeline: pipeline_variables <- bulk_windows_pipeline_setup(annot=annot, fastq=fastq_dir, outdir=outdir, genome_fa=genome_fa, config_file=config_file) ## ---- eval=TRUE--------------------------------------------------------------- gff3_to_bed12(pipeline_variables$annot, pipeline_variables$tmp_bed) ## ---- eval=FALSE-------------------------------------------------------------- # {_prog} -ax splice -t 12 {_others} -k14 --secondary=no {_index} {_fq} -o {_out} ## ---- eval=FALSE-------------------------------------------------------------- # samtools_as_bam(pipeline_variables$tmp_sam, pipeline_variables$tmp_bam) # samtools_sort_index(pipeline_variables$tmp_bam, pipeline_variables$genome_bam) # file.remove(pipeline_variables$tmp_sam) # file.remove(pipeline_variables$tmp_bam) ## ----eval=TRUE, echo=FALSE, results='hide'------------------------------------ pipeline_variables$genome_bam = genome_bam ## ----eval=FALSE--------------------------------------------------------------- # pipeline_variables <- windows_pipeline_isoforms(pipeline_variables) ## ----eval=TRUE, echo=FALSE, results='hide'------------------------------------ assembly <- paste0(temp_path, "/transcript_assembly.fa") file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Transcript Assembly Fasta", paste(file_url, "transcript_assembly.fa", sep="/")))]], assembly) assembly_index <- paste0(temp_path, "/transcript_assembly.fa.fai") file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Transcript Assembly Index", paste0(file_url, "/transcript_assembly.fa.fai")))]], assembly_index) isoforms_annot <- paste0(temp_path, "/isoform_annotated.gff3") file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "Annotated Isoform gff3", paste(file_url, "isoform_annotated.gff3", sep="/")))]], isoforms_annot) tss_tes <- paste0(temp_path, "/tss_tes.bedgraph") file.rename(bfc[[names(BiocFileCache::bfcadd(bfc, "TSS TES enrichment", paste0(file_url, "/tss_tes.bedgraph")))]], tss_tes) pipeline_variables$isoform_gff3 <- isoforms_annot pipeline_variables$transcript_fa <- assembly pipeline_variables$transcript_fa_idx <- assembly_index pipeline_variables$tss_tes_stat <- tss_tes g <- parse_gff_tree(annot) pipeline_variables$transcript_dict <- g$transcript_dict i <- parse_gff_tree(isoforms_annot) pipeline_variables$transcript_dict_i <- i$transcript_dict ## ---- eval=FALSE-------------------------------------------------------------- # {_prog} -ax map-ont -p 0.9 --end-bonus 10 -N 3 -t 12 {_index} {_fq} -o {_out} ## ---- eval=FALSE-------------------------------------------------------------- # samtools_as_bam(pipeline_variables$tmp_sam, pipeline_variables$tmp_bam) # samtools_sort_index(pipeline_variables$tmp_bam, pipeline_variables$realign_bam) # file.remove(pipeline_variables$tmp_sam) # file.remove(pipeline_variables$tmp_bam) ## ----eval=TRUE, echo=FALSE, results='hide'------------------------------------ pipeline_variables$realign_bam = realign_bam ## ----eval=TRUE---------------------------------------------------------------- se <- windows_pipeline_quantification(pipeline_variables) ## ----eval=TRUE---------------------------------------------------------------- se ## ----echo=FALSE--------------------------------------------------------------- utils::sessionInfo()