@ARTICLE{Choi2007,
  author = {Hyungwon Choi and Ronglai Shen and Arul Chinnaiyan and Debashis Ghosh},
  title = {A Latent Variable Approach for Meta-Analysis of Gene Expression Data
	from Multiple Microarray Experiments.},
  journal = {BMC Bioinformatics},
  year = {2007},
  volume = {8},
  pages = {364},
  number = {1},
  month = {Sep},
  abstract = {ABSTRACT: BACKGROUND: With the explosion in data generated using microarray
	technology by different investigators working on similar experiments,
	it is of interest to combine results across multiple studies. RESULTS:
	In this article, we describe a general probabilistic framework for
	combining high-throughput genomic data from several related microarray
	experiments using mixture models. A key feature of the model is the
	use of latent variables that represent quantities that can be combined
	across diverse platforms. We consider two methods for estimation
	of an index termed the probability of expression (POE). The first,
	reported in previous work by the authors, involves Markov Chain Monte
	Carlo (MCMC) techniques. The second method is a faster algorithm
	based on the expectation-maximization (EM) algorithm. The methods
	are illustrated with application to a meta-analysis of datasets for
	metastatic cancer. CONCLUSIONS: The statistical methods described
	in the paper are available as an R package, metaArray 1.7.1, which
	is at Bioconductor, whose URL is http://www.bioconductor.org/.},
  doi = {10.1186/1471-2105-8-364},
  owner = {rscharpf},
  pii = {1471-2105-8-364},
  pmid = {17900369},
  timestamp = {2007.12.05},
  url = {http://dx.doi.org/10.1186/1471-2105-8-364}
}

@ARTICLE{Cope2004,
  author = {Leslie Cope and Xiaogang Zhong and Elizabeth Garrett and Giovanni
	Parmigiani},
  title = {{M}erge{M}aid: {R} tools for merging and cross-study validation of
	gene expression data.},
  journal = {Stat Appl Genet Mol Biol},
  year = {2004},
  volume = {3},
  pages = {Article29},
  abstract = {Cross-study validation of gene expression investigations is critical
	in genomic analysis. We developed an R package and associated object
	definitions to merge and visualize multiple gene expression datasets.
	Our merging functions use arbitrary character IDs and generate objects
	that can efficiently support a variety of joint analyses. Visualization
	tools support exploration and cross-study validation of the data,
	without requiring normalization across platforms. Tools include "integrative
	correlation'' plots that is, scatterplots of all pairwise correlations
	in one study against the corresponding pairwise correlations of another,
	both for individual genes and all genes combined. Gene-specific plots
	can be used to identify genes whose changes are reliably measured
	across studies. Visualizations also include scatterplots of gene-specific
	statistics quantifying relationships between expression and phenotypes
	of interest, using linear, logistic and Cox regression.},
  doi = {10.2202/1544-6115.1046},
  keywords = {16646808},
  owner = {rscharpf},
  pmid = {16646808},
  timestamp = {2007.01.05},
  url = {http://dx.doi.org/10.2202/1544-6115.1046}
}

@ARTICLE{garb:etal:2001,
  author = {Mitchell E. Garber and Olga G. Troyanskaya and Karsten Schluens and
	Simone Petersen and Zsuzsanna Thaesler and Manuela Pacyna-Gengelbach
	and Matt van de Rijn and Glenn D. Rosen and Charles M. Perou and
	Richard I. Whyte and Russ B. Altman and Patrick O. Brown and David
	Botstein and Iver Petersen},
  title = {Diversity of gene expression in adenocarcinoma of the lung},
  journal = {Proceedings of the National Academy of Sciences USA},
  year = {2001},
  volume = {98},
  pages = {13784--13789}
}

@ARTICLE{Garrett-Mayer2007,
  author = {Elizabeth Garrett-Mayer and Giovanni Parmigiani and Xiaogang Zhong
	and Leslie Cope and Edward Gabrielson},
  title = {Cross-study validation and combined analysis of gene expression microarray
	data.},
  journal = {Biostatistics},
  year = {2007},
  month = {Sep},
  abstract = {Investigations of transcript levels on a genomic scale using hybridization-based
	arrays have led to formidable advances in our understanding of the
	biology of many human illnesses. At the same time, these investigations
	have generated controversy because of the probabilistic nature of
	the conclusions and the surfacing of noticeable discrepancies between
	the results of studies addressing the same biological question. In
	this article, we present simple and effective data analysis and visualization
	tools for gauging the degree to which the findings of one study are
	reproduced by others and for integrating multiple studies in a single
	analysis. We describe these approaches in the context of studies
	of breast cancer and illustrate that it is possible to identify a
	substantial biologically relevant subset of the human genome within
	which hybridization results are reliable. The subset generally varies
	with the platforms used, the tissues studied, and the populations
	being sampled. Despite important differences, it is also possible
	to develop simple expression measures that allow comparison across
	platforms, studies, laboratories and populations. Important biological
	signals are often preserved or enhanced. Cross-study validation and
	combination of microarray results requires careful, but not overly
	complex, statistical thinking and can become a routine component
	of genomic analysis.},
  doi = {10.1093/biostatistics/kxm033},
  keywords = {17873151},
  owner = {gp},
  pii = {kxm033},
  pmid = {17873151},
  timestamp = {2007.09.29},
  url = {http://dx.doi.org/10.1093/biostatistics/kxm033}
}

@ARTICLE{Gentleman2004,
  author = {Robert C Gentleman and Vincent J Carey and Douglas M Bates and Ben
	Bolstad and Marcel Dettling and Sandrine Dudoit and Byron Ellis and
	Laurent Gautier and Yongchao Ge and Jeff Gentry and Kurt Hornik and
	Torsten Hothorn and Wolfgang Huber and Stefano Iacus and Rafael Irizarry
	and Friedrich Leisch and Cheng Li and Martin Maechler and Anthony
	J Rossini and Gunther Sawitzki and Colin Smith and Gordon Smyth and
	Luke Tierney and Jean Y H Yang and Jianhua Zhang},
  title = {{B}ioconductor: open software development for computational biology
	and bioinformatics.},
  journal = {Genome Biol},
  year = {2004},
  volume = {5},
  pages = {R80},
  number = {10},
  abstract = {The Bioconductor project is an initiative for the collaborative creation
	of extensible software for computational biology and bioinformatics.
	The goals of the project include: fostering collaborative development
	and widespread use of innovative software, reducing barriers to entry
	into interdisciplinary scientific research, and promoting the achievement
	of remote reproducibility of research results. We describe details
	of our aims and methods, identify current challenges, compare Bioconductor
	to other open bioinformatics projects, and provide working examples.},
  doi = {10.1186/gb-2004-5-10-r80},
  keywords = {Computational Biology, Internet, Reproducibility of Results, Software,
	15461798},
  owner = {rscharpf},
  pii = {gb-2004-5-10-r80},
  pmid = {15461798},
  timestamp = {2007.01.09},
  url = {http://dx.doi.org/10.1186/gb-2004-5-10-r80}
}


@ARTICLE{Hong2006,
  author = {Fangxin Hong and Rainer Breitling and Connor W McEntee and Ben S
	Wittner and Jennifer L Nemhauser and Joanne Chory},
  title = {RankProd: a bioconductor package for detecting differentially expressed
	genes in meta-analysis.},
  journal = {Bioinformatics},
  year = {2006},
  volume = {22},
  pages = {2825--2827},
  number = {22},
  month = {Nov},
  abstract = {While meta-analysis provides a powerful tool for analyzing microarray
	experiments by combining data from multiple studies, it presents
	unique computational challenges. The Bioconductor package RankProd
	provides a new and intuitive tool for this purpose in detecting differentially
	expressed genes under two experimental conditions. The package modifies
	and extends the rank product method proposed by Breitling et al.,
	[(2004) FEBS Lett., 573, 83-92] to integrate multiple microarray
	studies from different laboratories and/or platforms. It offers several
	advantages over t-test based methods and accepts pre-processed expression
	datasets produced from a wide variety of platforms. The significance
	of the detection is assessed by a non-parametric permutation test,
	and the associated P-value and false discovery rate (FDR) are included
	in the output alongside the genes that are detected by user-defined
	criteria. A visualization plot is provided to view actual expression
	levels for each gene with estimated significance measurements. AVAILABILITY:
	RankProd is available at Bioconductor http://www.bioconductor.org.
	A web-based interface will soon be available at http://cactus.salk.edu/RankProd},
  doi = {10.1093/bioinformatics/btl476},
  keywords = {Computational Bio; DNA, Complementary; Data Interpretation, Statistical;
	False Positive Reactions; Gene Expression Profiling; Gene Expression
	Regulation; Internet; Meta-Analysis as Topic; Metabolism; Models,
	Statistical; Oligonucleotide Array Sequence Analysis; Plant Proteins;
	Proteomics; Reproducibility of Results; Software; logy},
  owner = {rscharpf},
  pii = {btl476},
  pmid = {16982708},
  timestamp = {2007.12.05},
  url = {http://dx.doi.org/10.1093/bioinformatics/btl476}
}

  @Manual{Lusa2007,
    title = {GeneMeta: MetaAnalysis for High Throughput Experiments},
    author = {Lara Lusa and R. Gentleman and M. Ruschhaupt},
    year = {2007},
    note = {R package version 1.11.0},
  }

@ARTICLE{Parmigiani2004,
  author = {Giovanni Parmigiani and Elizabeth Garrett and Ramaswamy Anbazhagan
	and Edward Gabrielson},
  title = {{M}olecular classification of lung cancer: a cross-platform comparison
	of gene expression data sets.},
  journal = {Chest},
  year = {2004},
  volume = {125},
  pages = {103S},
  number = {5 Suppl},
  month = {May},
  keywords = {Adenocarcinoma, Carcinoma, Gene Expression, Gene Expression Profiling,
	Humans, Lung Neoplasms, Proportional Hazards Models, Squamous Cell,
	15136439},
  owner = {rscharpf},
  pmid = {15136439},
  timestamp = {2007.03.07}
}

@MANUAL{Plummer2007,
  title = {coda: Output analysis and diagnostics for MCMC},
  author = {Martyn Plummer and Nicky Best and Kate Cowles and Karen Vines},
  year = {2007},
  note = {R package version 0.12-1}
}

@ARTICLE{Scharpf2009,
  author = {Scharpf, Robert B. and Tjelmeland, H{\aa}kon and Parmigiani, Giovanni
	and Nobel, Andrew},
  title = {A {B}ayesian model for cross-study differential gene expression},
  journal = {JASA},
  year = {2009},
  note = {To appear}
}


@ARTICLE{Parmigiani2004a,
  author = {Giovanni Parmigiani and Elizabeth S Garrett-Mayer and Ramaswamy Anbazhagan
	and Edward Gabrielson},
  title = {{A} cross-study comparison of gene expression studies for the molecular
	classification of lung cancer.},
  journal = {Clin Cancer Res},
  year = {2004},
  volume = {10},
  pages = {2922--2927},
  number = {9},
  month = {May},
  abstract = {PURPOSE: Recent studies sought to refine lung cancer classification
	using gene expression microarrays. We evaluate the extent to which
	these studies agree and whether results can be integrated. EXPERIMENTAL
	DESIGN: We developed a practical analysis plan for cross-study comparison,
	validation, and integration of cancer molecular classification studies
	using public data. We evaluated genes for cross-platform consistency
	of expression patterns, using integrative correlations, which quantify
	cross-study reproducibility without relying on direct assimilation
	of expression measurements across platforms. We then compared associations
	of gene expression levels to differential diagnosis of squamous cell
	carcinoma versus adenocarcinoma via reproducibility of the gene-specific
	t statistics and to survival via reproducibility of Cox coefficients.
	RESULTS: Integrative correlation analysis revealed a large proportion
	of genes in which the patterns agreed across studies more than would
	be expected by chance. Correlation of t statistics for diagnosis
	of squamous cell carcinoma versus adenocarcinoma is high (0.85) and
	increases (0.925) when using only the most consistent genes identified
	by integrative correlation. Correlations of Cox coefficients ranged
	from 0.13 to 0.31 (0.33-0.49 with genes selected for consistency).
	Although we find genes that are significant in multiple studies but
	show discordant effects, their number is approximately that expected
	by chance. We report genes that are reproducible by integrative analysis,
	significant in all studies, and concordant in effect. CONCLUSIONS:
	Cross-study comparison revealed significant, albeit incomplete, agreement
	of gene expression patterns related to lung cancer biology and identified
	genes that reproducibly predict outcomes. This analysis approach
	is broadly applicable to cross-study comparisons of gene expression
	profiling projects.},
  keywords = {Adenocarcinoma, Carcinoma, Diagnosis, Differential, Gene Expression
	Profiling, Gene Expression Regulation, Humans, Lung Neoplasms, Neoplastic,
	Reproducibility of Results, Squamous Cell, 15131026},
  owner = {rscharpf},
  pmid = {15131026},
  timestamp = {2007.03.07}
}