## ----setup--------------------------------------------------------------------
library(MungeSumstats)

## ---- eval=FALSE, message=TRUE------------------------------------------------
#  eduAttainOkbayPth <- system.file("extdata","eduAttainOkbay.txt",
#                                    package="MungeSumstats")
#  reformatted <-
#    MungeSumstats::format_sumstats(path=eduAttainOkbayPth,
#                                   ref_genome="GRCh37")

## ----echo=FALSE---------------------------------------------------------------
#don't run time intensive checks
eduAttainOkbayPth <- system.file("extdata","eduAttainOkbay.txt",
                                  package="MungeSumstats")
reformatted <- 
  MungeSumstats::format_sumstats(path=eduAttainOkbayPth,
                                 on_ref_genome = FALSE,
                                 strand_ambig_filter = FALSE,
                                 bi_allelic_filter = FALSE,
                                 allele_flip_check = FALSE,
                                 ref_genome="GRCh37")

## ---- message=TRUE------------------------------------------------------------
#save ALS GWAS from the ieu open GWAS project to a temp directory
ALSvcfPth <- system.file("extdata","ALSvcf.vcf", package="MungeSumstats")

## ----eval=FALSE---------------------------------------------------------------
#  reformatted_vcf <-
#    MungeSumstats::format_sumstats(path=ALSvcfPth,
#                                   ref_genome="GRCh37")

## ---- eval=FALSE, message=FALSE-----------------------------------------------
#  #set
#  reformatted_vcf_2 <-
#    MungeSumstats::format_sumstats(path=ALSvcfPth,
#                                   ref_genome="GRCh37",
#                                   log_folder_ind=TRUE,
#                                   imputation_ind=TRUE,
#                                   log_mungesumstats_msgs=TRUE)

## ----echo=FALSE,message=FALSE-------------------------------------------------
#don't run time intensive checks
reformatted_vcf_2 <- 
  MungeSumstats::format_sumstats(path=ALSvcfPth,
                                 ref_genome="GRCh37",
                                 log_folder_ind=TRUE,
                                 imputation_ind=TRUE,
                                 log_mungesumstats_msgs=TRUE,
                                 on_ref_genome = FALSE,
                                 strand_ambig_filter = FALSE,
                                 bi_allelic_filter = FALSE,
                                 allele_flip_check = FALSE)

## ---- message=TRUE------------------------------------------------------------
names(reformatted_vcf_2)

## ---- message=TRUE------------------------------------------------------------
print(reformatted_vcf_2$log_files$info_filter)

## ---- message=FALSE,eval=FALSE------------------------------------------------
#  #set
#  reformatted_vcf_2 <-
#    MungeSumstats::format_sumstats(path=ALSvcfPth,
#                                   ref_genome="GRCh37",
#                                   log_folder_ind=TRUE,
#                                   imputation_ind=TRUE,
#                                   log_mungesumstats_msgs=TRUE,
#                                   return_format="GRanges")

## ---- message=FALSE,eval=FALSE------------------------------------------------
#  #set
#  reformatted_vcf_2 <-
#    MungeSumstats::format_sumstats(path=ALSvcfPth,
#                                   ref_genome="GRCh37",
#                                   write_vcf=TRUE,
#                                   save_format ="openGWAS")

## ---- message=FALSE,eval=FALSE------------------------------------------------
#  # Pass path to Educational Attainment Okbay sumstat file to a temp directory
#  eduAttainOkbayPth <- system.file("extdata", "eduAttainOkbay.txt",
#                                    package = "MungeSumstats")
#  sumstats_list <- list(ss1 = eduAttainOkbayPth, ss2 = eduAttainOkbayPth)
#  
#  ref_genomes <- MungeSumstats::get_genome_builds(sumstats_list = sumstats_list)
#  

## -----------------------------------------------------------------------------
sumstats_dt <- MungeSumstats::formatted_example()
sumstats_dt_hg38 <- MungeSumstats::liftover(sumstats_dt = sumstats_dt, 
                                            ref_genome = "hg19",
                                            convert_ref_genome = "hg38")
knitr::kable(head(sumstats_dt_hg38))

## -----------------------------------------------------------------------------
eduAttainOkbayPth <- system.file("extdata", "eduAttainOkbay.txt",
                                  package = "MungeSumstats")
formatted_path <- tempfile(fileext = "eduAttainOkbay_standardised.tsv.gz")


#### 1. Read in the data and standardise header names ####
dat <- MungeSumstats::read_sumstats(path = eduAttainOkbayPth, 
                                    standardise_headers = TRUE)
knitr::kable(head(dat))
#### 2. Write to disk as a compressed, tab-delimited, tabix-indexed file ####
formatted_path <- MungeSumstats::write_sumstats(sumstats_dt = dat,
                                                save_path = formatted_path,
                                                tabix_index = TRUE,
                                                write_vcf = FALSE,
                                                return_path = TRUE)   

## -----------------------------------------------------------------------------
#### Mess up some column names ####
dat_raw <- data.table::copy(dat)
data.table::setnames(dat_raw, c("SNP","CHR"), c("rsID","Seqnames"))
#### Add a non-standard column that I want to keep the casing for ####
dat_raw$Support <- runif(nrow(dat_raw))

dat2 <- MungeSumstats::standardise_header(sumstats_dt = dat_raw,
                                          uppercase_unmapped = FALSE, 
                                          return_list = FALSE )
knitr::kable(head(dat2))

## ---- message=TRUE, echo=FALSE------------------------------------------------
utils::sessionInfo()