%% This BibTeX bibliography file was created using BibDesk. %% http://bibdesk.sourceforge.net/ %% Created for Luigi Marchionni at 2014-04-14 10:26:13 -0400 %% Saved with string encoding Unicode (UTF-8) @conference{ICMLA08, Author = {D. Geman and B. Afsari and A.C. Tan, D. Naiman}, Date-Added = {2014-04-14 14:26:12 +0000}, Date-Modified = {2014-04-14 14:26:12 +0000}, Editor = {Proceedings ICMLA}, Note = {(Winner, ICMLA Microarray Classification Algorithm Competition)}, Title = {Microarray classification from several two-gene experssion comparisons}, Year = {2008}} @article{iTSP, Author = {Relative mRNA Levels of Functionally Interacting Proteins Are Consistent Disease Molecular Signatures}, Date-Added = {2014-04-14 14:26:12 +0000}, Date-Modified = {2014-04-14 14:26:12 +0000}, Journal = {PLOS ONE}, Title = {Wang, Yuliang and Afsari, Bahman and Geman, Donald and Price, Nathan}, Year = {Under revision}} @article{KTSPAOAS, Author = {Afsari, Bahman and Braga-Neto, Ulissess and Geman, Donald}, Date-Added = {2014-04-14 14:26:12 +0000}, Date-Modified = {2014-04-14 14:26:12 +0000}, Journal = {Annals of Applied Statistics}, Title = {Rank discriminants for predicting phenotypes from RNA expression}, Year = {to appear}} @article{Buyse:2006aa, Abstract = {BACKGROUND: A 70-gene signature was previously shown to have prognostic value in patients with node-negative breast cancer. Our goal was to validate the signature in an independent group of patients. METHODS: Patients (n = 307, with 137 events after a median follow-up of 13.6 years) from five European centers were divided into high- and low-risk groups based on the gene signature classification and on clinical risk classifications. Patients were assigned to the gene signature low-risk group if their 5-year distant metastasis-free survival probability as estimated by the gene signature was greater than 90%. Patients were assigned to the clinicopathologic low-risk group if their 10-year survival probability, as estimated by Adjuvant! software, was greater than 88% (for estrogen receptor [ER]-positive patients) or 92% (for ER-negative patients). Hazard ratios (HRs) were estimated to compare time to distant metastases, disease-free survival, and overall survival in high- versus low-risk groups. RESULTS: The 70-gene signature outperformed the clinicopathologic risk assessment in predicting all endpoints. For time to distant metastases, the gene signature yielded HR = 2.32 (95% confidence interval [CI] = 1.35 to 4.00) without adjustment for clinical risk and hazard ratios ranging from 2.13 to 2.15 after adjustment for various estimates of clinical risk; clinicopathologic risk using Adjuvant! software yielded an unadjusted HR = 1.68 (95% CI = 0.92 to 3.07). For overall survival, the gene signature yielded an unadjusted HR = 2.79 (95% CI = 1.60 to 4.87) and adjusted hazard ratios ranging from 2.63 to 2.89; clinicopathologic risk yielded an unadjusted HR = 1.67 (95% CI = 0.93 to 2.98). For patients in the gene signature high-risk group, 10-year overall survival was 0.69 for patients in both the low- and high-clinical risk groups; for patients in the gene signature low-risk group, the 10-year survival rates were 0.88 and 0.89, respectively. CONCLUSIONS: The 70-gene signature adds independent prognostic information to clinicopathologic risk assessment for patients with early breast cancer.}, Author = {Buyse, Marc and Loi, Sherene and van't Veer, Laura and Viale, Giuseppe and Delorenzi, Mauro and Glas, Annuska M and d'Assignies, Mahasti Saghatchian and Bergh, Jonas and Lidereau, Rosette and Ellis, Paul and Harris, Adrian and Bogaerts, Jan and Therasse, Patrick and Floore, Arno and Amakrane, Mohamed and Piette, Fanny and Rutgers, Emiel and Sotiriou, Christos and Cardoso, Fatima and Piccart, Martine J and {TRANSBIG Consortium}}, Date-Added = {2014-03-20 05:31:44 +0000}, Date-Modified = {2014-03-20 05:31:44 +0000}, Doi = {10.1093/jnci/djj329}, Journal = {J Natl Cancer Inst}, Journal-Full = {Journal of the National Cancer Institute}, Mesh = {Adult; Breast Neoplasms; Disease-Free Survival; Europe; Female; Follow-Up Studies; Gene Expression Profiling; Humans; Lymphatic Metastasis; Middle Aged; Neoplasm Staging; Odds Ratio; Oligonucleotide Array Sequence Analysis; Predictive Value of Tests; Prognosis; ROC Curve; Reproducibility of Results; Risk Assessment; Risk Factors; Sensitivity and Specificity; Survival Analysis; Tumor Markers, Biological}, Month = {Sep}, Number = {17}, Pages = {1183-92}, Pmid = {16954471}, Pst = {ppublish}, Title = {Validation and clinical utility of a 70-gene prognostic signature for women with node-negative breast cancer}, Volume = {98}, Year = {2006}, Bdsk-Url-1 = {http://dx.doi.org/10.1093/jnci/djj329}} @article{Glas:2006aa, Abstract = {BACKGROUND: A 70-gene tumor expression profile was established as a powerful predictor of disease outcome in young breast cancer patients. This profile, however, was generated on microarrays containing 25,000 60-mer oligonucleotides that are not designed for processing of many samples on a routine basis. RESULTS: To facilitate its use in a diagnostic setting, the 70-gene prognosis profile was translated into a customized microarray (MammaPrint) containing a reduced set of 1,900 probes suitable for high throughput processing. RNA of 162 patient samples from two previous studies was subjected to hybridization to this custom array to validate the prognostic value. Classification results obtained from the original analysis were then compared to those generated using the algorithms based on the custom microarray and showed an extremely high correlation of prognosis prediction between the original data and those generated using the custom mini-array (p < 0.0001). CONCLUSION: In this report we demonstrate for the first time that microarray technology can be used as a reliable diagnostic tool. The data clearly demonstrate the reproducibility and robustness of the small custom-made microarray. The array is therefore an excellent tool to predict outcome of disease in breast cancer patients.}, Author = {Glas, Annuska M and Floore, Arno and Delahaye, Leonie J M J and Witteveen, Anke T and Pover, Rob C F and Bakx, Niels and Lahti-Domenici, Jaana S T and Bruinsma, Tako J and Warmoes, Marc O and Bernards, Ren{\'e} and Wessels, Lodewyk F A and Van't Veer, Laura J}, Date-Added = {2014-03-20 05:31:08 +0000}, Date-Modified = {2014-03-20 05:31:08 +0000}, Doi = {10.1186/1471-2164-7-278}, Journal = {BMC Genomics}, Journal-Full = {BMC genomics}, Mesh = {Breast Neoplasms; Female; Gene Expression Profiling; Humans; Kaplan-Meier Estimate; Middle Aged; Oligonucleotide Array Sequence Analysis; Prognosis; Reproducibility of Results; Sensitivity and Specificity; Tumor Markers, Biological}, Pages = {278}, Pmc = {PMC1636049}, Pmid = {17074082}, Pst = {epublish}, Title = {Converting a breast cancer microarray signature into a high-throughput diagnostic test}, Volume = {7}, Year = {2006}, Bdsk-Url-1 = {http://dx.doi.org/10.1186/1471-2164-7-278}} @article{Tan:2005aa, Abstract = {MOTIVATION: Various studies have shown that cancer tissue samples can be successfully detected and classified by their gene expression patterns using machine learning approaches. One of the challenges in applying these techniques for classifying gene expression data is to extract accurate, readily interpretable rules providing biological insight as to how classification is performed. Current methods generate classifiers that are accurate but difficult to interpret. This is the trade-off between credibility and comprehensibility of the classifiers. Here, we introduce a new classifier in order to address these problems. It is referred to as k-TSP (k-Top Scoring Pairs) and is based on the concept of 'relative expression reversals'. This method generates simple and accurate decision rules that only involve a small number of gene-to-gene expression comparisons, thereby facilitating follow-up studies. RESULTS: In this study, we have compared our approach to other machine learning techniques for class prediction in 19 binary and multi-class gene expression datasets involving human cancers. The k-TSP classifier performs as efficiently as Prediction Analysis of Microarray and support vector machine, and outperforms other learning methods (decision trees, k-nearest neighbour and na{\"\i}ve Bayes). Our approach is easy to interpret as the classifier involves only a small number of informative genes. For these reasons, we consider the k-TSP method to be a useful tool for cancer classification from microarray gene expression data. AVAILABILITY: The software and datasets are available at http://www.ccbm.jhu.edu CONTACT: actan@jhu.edu.}, Author = {Tan, Aik Choon and Naiman, Daniel Q and Xu, Lei and Winslow, Raimond L and Geman, Donald}, Date-Added = {2014-03-20 05:15:05 +0000}, Date-Modified = {2014-03-20 05:15:05 +0000}, Doi = {10.1093/bioinformatics/bti631}, Journal = {Bioinformatics}, Journal-Full = {Bioinformatics (Oxford, England)}, Mesh = {Algorithms; Artificial Intelligence; Cluster Analysis; Diagnosis, Computer-Assisted; Gene Expression Profiling; Humans; Neoplasm Proteins; Neoplasms; Pattern Recognition, Automated; Reproducibility of Results; Sensitivity and Specificity; Tumor Markers, Biological}, Month = {Oct}, Number = {20}, Pages = {3896-904}, Pmc = {PMC1987374}, Pmid = {16105897}, Pst = {ppublish}, Title = {Simple decision rules for classifying human cancers from gene expression profiles}, Volume = {21}, Year = {2005}, Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/bti631}} @article{Xu:2005aa, Abstract = {MOTIVATION: DNA microarray data analysis has been used previously to identify marker genes which discriminate cancer from normal samples. However, due to the limited sample size of each study, there are few common markers among different studies of the same cancer. With the rapid accumulation of microarray data, it is of great interest to integrate inter-study microarray data to increase sample size, which could lead to the discovery of more reliable markers. RESULTS: We present a novel, simple method of integrating different microarray datasets to identify marker genes and apply the method to prostate cancer datasets. In this study, by applying a new statistical method, referred to as the top-scoring pair (TSP) classifier, we have identified a pair of robust marker genes (HPN and STAT6) by integrating microarray datasets from three different prostate cancer studies. Cross-platform validation shows that the TSP classifier built from the marker gene pair, which simply compares relative expression values, achieves high accuracy, sensitivity and specificity on independent datasets generated using various array platforms. Our findings suggest a new model for the discovery of marker genes from accumulated microarray data and demonstrate how the great wealth of microarray data can be exploited to increase the power of statistical analysis. CONTACT: leixu@jhu.edu.}, Author = {Xu, Lei and Tan, Aik Choon and Naiman, Daniel Q and Geman, Donald and Winslow, Raimond L}, Date-Added = {2014-03-20 05:15:04 +0000}, Date-Modified = {2014-03-20 05:15:04 +0000}, Doi = {10.1093/bioinformatics/bti647}, Journal = {Bioinformatics}, Journal-Full = {Bioinformatics (Oxford, England)}, Mesh = {Algorithms; Artificial Intelligence; Cluster Analysis; Diagnosis, Computer-Assisted; Gene Expression Profiling; Humans; Male; Neoplasm Proteins; Pattern Recognition, Automated; Prostatic Neoplasms; Reproducibility of Results; Sensitivity and Specificity; Systems Integration; Tumor Markers, Biological}, Month = {Oct}, Number = {20}, Pages = {3905-11}, Pmid = {16131522}, Pst = {ppublish}, Title = {Robust prostate cancer marker genes emerge from direct integration of inter-study microarray data}, Volume = {21}, Year = {2005}, Bdsk-Url-1 = {http://dx.doi.org/10.1093/bioinformatics/bti647}} @article{Geman:2004aa, Abstract = {We present a new approach to molecular classification based on mRNA comparisons. Our method, referred to as the top-scoring pair(s) (TSP) classifier, is motivated by current technical and practical limitations in using gene expression microarray data for class prediction, for example to detect disease, identify tumors or predict treatment response. Accurate statistical inference from such data is difficult due to the small number of observations, typically tens, relative to the large number of genes, typically thousands. Moreover, conventional methods from machine learning lead to decisions which are usually very difficult to interpret in simple or biologically meaningful terms. In contrast, the TSP classifier provides decision rules which i) involve very few genes and only relative expression values (e.g., comparing the mRNA counts within a single pair of genes); ii) are both accurate and transparent; and iii) provide specific hypotheses for follow-up studies. In particular, the TSP classifier achieves prediction rates with standard cancer data that are as high as those of previous studies which use considerably more genes and complex procedures. Finally, the TSP classifier is parameter-free, thus avoiding the type of over-fitting and inflated estimates of performance that result when all aspects of learning a predictor are not properly cross-validated.}, Author = {Geman, Donald and d'Avignon, Christian and Naiman, Daniel Q and Winslow, Raimond L}, Date-Added = {2014-03-20 05:15:03 +0000}, Date-Modified = {2014-03-20 05:15:03 +0000}, Doi = {10.2202/1544-6115.1071}, Journal = {Stat Appl Genet Mol Biol}, Journal-Full = {Statistical applications in genetics and molecular biology}, Pages = {Article19}, Pmc = {PMC1989150}, Pmid = {16646797}, Pst = {ppublish}, Title = {Classifying gene expression profiles from pairwise mRNA comparisons}, Volume = {3}, Year = {2004}, Bdsk-Url-1 = {http://dx.doi.org/10.2202/1544-6115.1071}} @article{Marchionni:2013aa, Abstract = {BACKGROUND: A small number of prognostic and predictive tests based on gene expression are currently offered as reference laboratory tests. In contrast to such success stories, a number of flaws and errors have recently been identified in other genomic-based predictors and the success rate for developing clinically useful genomic signatures is low. These errors have led to widespread concerns about the protocols for conducting and reporting of computational research. As a result, a need has emerged for a template for reproducible development of genomic signatures that incorporates full transparency, data sharing and statistical robustness. RESULTS: Here we present the first fully reproducible analysis of the data used to train and test MammaPrint, an FDA-cleared prognostic test for breast cancer based on a 70-gene expression signature. We provide all the software and documentation necessary for researchers to build and evaluate genomic classifiers based on these data. As an example of the utility of this reproducible research resource, we develop a simple prognostic classifier that uses only 16 genes from the MammaPrint signature and is equally accurate in predicting 5-year disease free survival. CONCLUSIONS: Our study provides a prototypic example for reproducible development of computational algorithms for learning prognostic biomarkers in the era of personalized medicine.}, Author = {Marchionni, Luigi and Afsari, Bahman and Geman, Donald and Leek, Jeffrey T}, Date-Added = {2014-03-20 05:15:02 +0000}, Date-Modified = {2014-03-20 05:15:02 +0000}, Doi = {10.1186/1471-2164-14-336}, Journal = {BMC Genomics}, Journal-Full = {BMC genomics}, Mesh = {Breast Neoplasms; Cohort Studies; Computational Biology; Gene Expression Profiling; Humans; Prognosis; Reproducibility of Results; Software}, Pages = {336}, Pmc = {PMC3662649}, Pmid = {23682826}, Pst = {epublish}, Title = {A simple and reproducible breast cancer prognostic test}, Volume = {14}, Year = {2013}, Bdsk-Url-1 = {http://dx.doi.org/10.1186/1471-2164-14-336}}