library(cBioPortalData)
library(AnVIL)
This document serves as a reporting tool for errors that occur when running our utility functions on the cBioPortal datasets.
cBioPortalData())Typically, the number of errors encountered via the API are low. There are only a handful of packages that error when we apply the utility functions to provide a MultiAssayExperiment data representation.
First, we load the error Rda dataset.
api_errs <- system.file(
"extdata", "api", "err_api_info.rda",
package = "cBioPortalData", mustWork = TRUE
)
load(api_errs)
We can now inspect the contents of the data:
class(err_api_info)
## [1] "list"
length(err_api_info)
## [1] 11
lengths(err_api_info)
## 'sampleMap' does not have required columns
## 2
## Bad Request (HTTP 400).
## 4
## Bad Gateway (HTTP 502).
## 11
## Frequency of NA values higher than the cutoff tolerance
## 2
## `n` must be a single number, not an integer `NA`.
## 2
## Barcodes must start with 'TCGA'
## 3
## Stream error in the HTTP/2 framing layer [www.cbioportal.org]: HTTP/2 stream 0 was not closed cleanly: INTERNAL_ERROR (err 2)
## 3
## Internal Server Error (HTTP 500).
## 1
## Inconsistent build numbers found
## 46
## when the length of the supplied 'genome' vector is not 1, then it must be equal to the\n number of sequences
## 1
## Only two build types at a time can be used
## 1
There were about 11 unique errors during the last build run.
names(err_api_info)
## [1] "'sampleMap' does not have required columns"
## [2] "Bad Request (HTTP 400)."
## [3] "Bad Gateway (HTTP 502)."
## [4] "Frequency of NA values higher than the cutoff tolerance"
## [5] "`n` must be a single number, not an integer `NA`."
## [6] "Barcodes must start with 'TCGA'"
## [7] "Stream error in the HTTP/2 framing layer [www.cbioportal.org]: HTTP/2 stream 0 was not closed cleanly: INTERNAL_ERROR (err 2)"
## [8] "Internal Server Error (HTTP 500)."
## [9] "Inconsistent build numbers found"
## [10] "when the length of the supplied 'genome' vector is not 1, then it must be equal to the\n number of sequences"
## [11] "Only two build types at a time can be used"
The most common error was Inconsistent build numbers found. This is
due to annotations from different build numbers that were not able to
be resolved.
To see what datasets (cancer_study_id s) have that error we can use:
err_api_info[['Inconsistent build numbers found']]
## [1] "thyroid_gatci_2024" "acc_tcga_gdc" "blca_tcga_gdc"
## [4] "brca_tcga_gdc" "cesc_tcga_gdc" "dlbclnos_tcga_gdc"
## [7] "esca_tcga_gdc" "gbm_tcga_gdc" "hnsc_tcga_gdc"
## [10] "ccrcc_tcga_gdc" "prcc_tcga_gdc" "difg_tcga_gdc"
## [13] "hcc_tcga_gdc" "luad_tcga_gdc" "lusc_tcga_gdc"
## [16] "plmeso_tcga_gdc" "hgsoc_tcga_gdc" "paad_tcga_gdc"
## [19] "mnet_tcga_gdc" "prad_tcga_gdc" "read_tcga_gdc"
## [22] "skcm_tcga_gdc" "stad_tcga_gdc" "nsgct_tcga_gdc"
## [25] "thpa_tcga_gdc" "thym_tcga_gdc" "ucec_tcga_gdc"
## [28] "ucs_tcga_gdc" "um_tcga_gdc" "coad_tcga_gdc"
## [31] "soft_tissue_tcga_gdc" "brain_cptac_gdc" "breast_cptac_gdc"
## [34] "coad_cptac_gdc" "luad_cptac_gdc" "lusc_cptac_gdc"
## [37] "ohnca_cptac_gdc" "ovary_cptac_gdc" "pancreas_cptac_gdc"
## [40] "uec_cptac_gdc" "asclc_msk_2024" "cscc_ranson_2022"
## [43] "ucec_msk_2024" "pancan_mappyacts_2022" "nst_nfosi_ntap"
## [46] "brca_aurora_2023"
We can also have a look at the entirety of the dataset.
err_api_info
## $`'sampleMap' does not have required columns`
## [1] "acyc_fmi_2014" "braf_msk_archer_2024"
##
## $`Bad Request (HTTP 400).`
## [1] "glioma_msk_2018" "mbn_sfu_2023" "pcnsl_msk_2024"
## [4] "pancan_pcawg_2020"
##
## $`Bad Gateway (HTTP 502).`
## [1] "heme_msk_impact_2022" "brca_tcga" "gbm_tcga"
## [4] "difg_glass_2019" "kirc_tcga" "lihc_tcga"
## [7] "lusc_tcga" "mng_utoronto_2021" "paad_tcga"
## [10] "pcpg_tcga" "sarc_tcga"
##
## $`Frequency of NA values higher than the cutoff tolerance`
## [1] "mixed_selpercatinib_2020" "ucec_ccr_msk_2022"
##
## $``n` must be a single number, not an integer `NA`.`
## [1] "msk_met_2021" "msk_chord_2024"
##
## $`Barcodes must start with 'TCGA'`
## [1] "nsclc_tcga_broad_2016" "blca_msk_tcga_2020" "mixed_msk_tcga_2021"
##
## $`Stream error in the HTTP/2 framing layer [www.cbioportal.org]: HTTP/2 stream 0 was not closed cleanly: INTERNAL_ERROR (err 2)`
## [1] "pan_origimed_2020" "coadread_tcga_pub" "meso_tcga"
##
## $`Internal Server Error (HTTP 500).`
## [1] "makeanimpact_ccr_2023"
##
## $`Inconsistent build numbers found`
## [1] "thyroid_gatci_2024" "acc_tcga_gdc" "blca_tcga_gdc"
## [4] "brca_tcga_gdc" "cesc_tcga_gdc" "dlbclnos_tcga_gdc"
## [7] "esca_tcga_gdc" "gbm_tcga_gdc" "hnsc_tcga_gdc"
## [10] "ccrcc_tcga_gdc" "prcc_tcga_gdc" "difg_tcga_gdc"
## [13] "hcc_tcga_gdc" "luad_tcga_gdc" "lusc_tcga_gdc"
## [16] "plmeso_tcga_gdc" "hgsoc_tcga_gdc" "paad_tcga_gdc"
## [19] "mnet_tcga_gdc" "prad_tcga_gdc" "read_tcga_gdc"
## [22] "skcm_tcga_gdc" "stad_tcga_gdc" "nsgct_tcga_gdc"
## [25] "thpa_tcga_gdc" "thym_tcga_gdc" "ucec_tcga_gdc"
## [28] "ucs_tcga_gdc" "um_tcga_gdc" "coad_tcga_gdc"
## [31] "soft_tissue_tcga_gdc" "brain_cptac_gdc" "breast_cptac_gdc"
## [34] "coad_cptac_gdc" "luad_cptac_gdc" "lusc_cptac_gdc"
## [37] "ohnca_cptac_gdc" "ovary_cptac_gdc" "pancreas_cptac_gdc"
## [40] "uec_cptac_gdc" "asclc_msk_2024" "cscc_ranson_2022"
## [43] "ucec_msk_2024" "pancan_mappyacts_2022" "nst_nfosi_ntap"
## [46] "brca_aurora_2023"
##
## $`when the length of the supplied 'genome' vector is not 1, then it must be equal to the\n number of sequences`
## [1] "thca_tcga_pan_can_atlas_2018"
##
## $`Only two build types at a time can be used`
## [1] "pancan_mimsi_msk_2024"
cBioDataPack()Now let’s look at the errors in the packaged datasets that are used for
cBioDataPack:
pack_errs <- system.file(
"extdata", "pack", "err_pack_info.rda",
package = "cBioPortalData", mustWork = TRUE
)
load(pack_errs)
We can do the same for this data:
length(err_pack_info)
## [1] 5
lengths(err_pack_info)
## more columns than column names
## 12
## Frequency of NA values higher than the cutoff tolerance
## 5
## invalid class "ExperimentList" object: \n Non-unique names provided
## 2
## non-character argument
## 2
## 'wget' call had nonzero exit status
## 13
We can get a list of all the errors present:
names(err_pack_info)
## [1] "more columns than column names"
## [2] "Frequency of NA values higher than the cutoff tolerance"
## [3] "invalid class \"ExperimentList\" object: \n Non-unique names provided"
## [4] "non-character argument"
## [5] "'wget' call had nonzero exit status"
And finally the full list of errors:
err_pack_info
## $`more columns than column names`
## [1] "ccrcc_utokyo_2013" "gbm_cptac_2021"
## [3] "luad_mskimpact_2021" "mbl_dkfz_2017"
## [5] "pan_origimed_2020" "sarcoma_msk_2022"
## [7] "bowel_colitis_msk_2022" "prad_msk_mdanderson_2023"
## [9] "brca_tcga_pan_can_atlas_2018" "coadread_tcga_pan_can_atlas_2018"
## [11] "ov_tcga_pan_can_atlas_2018" "sarc_tcga_pan_can_atlas_2018"
##
## $`Frequency of NA values higher than the cutoff tolerance`
## [1] "ihch_mskcc_2020" "mixed_selpercatinib_2020"
## [3] "ucec_ccr_msk_2022" "mixed_msk_tcga_2021"
## [5] "ihch_msk_2021"
##
## $`invalid class "ExperimentList" object: \n Non-unique names provided`
## [1] "mpnst_mskcc" "stad_tcga_pub"
##
## $`non-character argument`
## [1] "pcpg_tcga_pub" "mbn_mdacc_2013"
##
## $`'wget' call had nonzero exit status`
## [1] "braf_msk_impact_2024" "braf_msk_archer_2024" "prostate_msk_2024"
## [4] "pcnsl_msk_2024" "pdac_msk_2024" "ucs_msk_2024"
## [7] "asclc_msk_2024" "lms_msk_2024" "crc_orion_2024"
## [10] "brca_aurora_2023" "hcc_msk_2024" "pancreas_msk_2024"
## [13] "pancan_mimsi_msk_2024"
sessionInfo()
## R version 4.4.3 (2025-02-28)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.2 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.20-bioc/R/lib/libRblas.so
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] survminer_0.5.0 ggpubr_0.6.0
## [3] ggplot2_3.5.1 survival_3.8-3
## [5] cBioPortalData_2.18.2 MultiAssayExperiment_1.32.0
## [7] SummarizedExperiment_1.36.0 Biobase_2.66.0
## [9] GenomicRanges_1.58.0 GenomeInfoDb_1.42.3
## [11] IRanges_2.40.1 S4Vectors_0.44.0
## [13] BiocGenerics_0.52.0 MatrixGenerics_1.18.1
## [15] matrixStats_1.5.0 AnVIL_1.18.5
## [17] AnVILBase_1.0.0 dplyr_1.1.4
## [19] BiocStyle_2.34.0
##
## loaded via a namespace (and not attached):
## [1] jsonlite_1.9.1 magrittr_2.0.3
## [3] magick_2.8.6 GenomicFeatures_1.58.0
## [5] farver_2.1.2 rmarkdown_2.29
## [7] BiocIO_1.16.0 zlibbioc_1.52.0
## [9] vctrs_0.6.5 memoise_2.0.1
## [11] Rsamtools_2.22.0 RCurl_1.98-1.17
## [13] tinytex_0.56 rstatix_0.7.2
## [15] htmltools_0.5.8.1 S4Arrays_1.6.0
## [17] BiocBaseUtils_1.8.0 lambda.r_1.2.4
## [19] curl_6.2.2 broom_1.0.7
## [21] Formula_1.2-5 SparseArray_1.6.2
## [23] sass_0.4.9 bslib_0.9.0
## [25] htmlwidgets_1.6.4 httr2_1.1.1
## [27] zoo_1.8-13 futile.options_1.0.1
## [29] cachem_1.1.0 commonmark_1.9.5
## [31] GenomicAlignments_1.42.0 mime_0.13
## [33] lifecycle_1.0.4 pkgconfig_2.0.3
## [35] Matrix_1.7-3 R6_2.6.1
## [37] fastmap_1.2.0 GenomeInfoDbData_1.2.13
## [39] shiny_1.10.0 digest_0.6.37
## [41] colorspace_2.1-1 RaggedExperiment_1.30.0
## [43] AnnotationDbi_1.68.0 ps_1.9.0
## [45] RSQLite_2.3.9 labeling_0.4.3
## [47] filelock_1.0.3 RTCGAToolbox_2.36.0
## [49] km.ci_0.5-6 RJSONIO_1.3-1.9
## [51] httr_1.4.7 abind_1.4-8
## [53] compiler_4.4.3 bit64_4.6.0-1
## [55] withr_3.0.2 backports_1.5.0
## [57] BiocParallel_1.40.0 carData_3.0-5
## [59] DBI_1.2.3 ggsignif_0.6.4
## [61] rappdirs_0.3.3 DelayedArray_0.32.0
## [63] rjson_0.2.23 tools_4.4.3
## [65] chromote_0.5.0 httpuv_1.6.15
## [67] glue_1.8.0 restfulr_0.0.15
## [69] promises_1.3.2 gridtext_0.1.5
## [71] grid_4.4.3 generics_0.1.3
## [73] gtable_0.3.6 KMsurv_0.1-5
## [75] tzdb_0.5.0 tidyr_1.3.1
## [77] websocket_1.4.2 data.table_1.17.0
## [79] hms_1.1.3 car_3.1-3
## [81] xml2_1.3.8 utf8_1.2.4
## [83] XVector_0.46.0 markdown_2.0
## [85] pillar_1.10.1 stringr_1.5.1
## [87] later_1.4.1 splines_4.4.3
## [89] ggtext_0.1.2 BiocFileCache_2.14.0
## [91] lattice_0.22-6 rtracklayer_1.66.0
## [93] bit_4.6.0 tidyselect_1.2.1
## [95] Biostrings_2.74.1 miniUI_0.1.1.1
## [97] knitr_1.50 gridExtra_2.3
## [99] litedown_0.6 bookdown_0.42
## [101] futile.logger_1.4.3 xfun_0.51
## [103] DT_0.33 stringi_1.8.4
## [105] UCSC.utils_1.2.0 yaml_2.3.10
## [107] evaluate_1.0.3 codetools_0.2-20
## [109] tibble_3.2.1 BiocManager_1.30.25
## [111] cli_3.6.4 xtable_1.8-4
## [113] munsell_0.5.1 processx_3.8.6
## [115] jquerylib_0.1.4 survMisc_0.5.6
## [117] Rcpp_1.0.14 GenomicDataCommons_1.30.1
## [119] dbplyr_2.5.0 png_0.1-8
## [121] XML_3.99-0.18 rapiclient_0.1.8
## [123] parallel_4.4.3 TCGAutils_1.26.0
## [125] readr_2.1.5 blob_1.2.4
## [127] bitops_1.0-9 scales_1.3.0
## [129] purrr_1.0.4 crayon_1.5.3
## [131] rlang_1.1.5 KEGGREST_1.46.0
## [133] rvest_1.0.4 formatR_1.14