\name{mergeSAGE}
\alias{mergeSAGE}
\alias{getLibInfo}
\alias{calNormFact}
\alias{getLibNNum}
\alias{getUniqTags}
\alias{writeSAGE4Win}
\alias{writeSAGE4Unix}
\alias{mapFile2Tag}
\alias{writeSAGECounts}
\alias{writeSAGE2DB}
\alias{getColSQL}
\title{Functions to merge SAGE libraries based on unique SAGE tags}
\description{
  These functions merge individual SAGE libraries based on unique SAGE
  tags and write the merged data into a file and a table in a database
  with the unique SAGE tags as one column and counts from all the
  libraries as the others.  
}
\usage{
mergeSAGE(libNames, isDir = TRUE,  skip = 1, pattern = ".sage")
getLibInfo(fileNames)
calNormFact(normalize = c("min", "max"), libNNum)
getLibNNum(fileNames)
getUniqTags(fileNames, skip = 1, sep = "\t")
writeSAGE4Win(fileNames, uniqTags, infoData, pace = 1000)
mapFile2Tag(fileNames, tags, skip, n)
writeSAGECounts(fileNames, uniqTags, skip, sep = "\t")
writeSAGE2DB(dbArgs, colNames, keys, numCols, fileName, what =
c("counts", "map", "info"), charNum = 20, type = "int4")
getColSQL(colNames, charNum, keys, numCols, type)
writeSAGE4Unix(countData, infoData)
}

\arguments{
  \item{libNames}{\code{libNames} - a vector of character strings for
    the name of the SAGE libraries to be merged. \code{libNames} can be
    the name of the directory containing SAGE libraries to be merged}
  \item{isDir}{\code{isDir} - a boolean that is TRUE if libNames is the
    name for the directory that contains SAGE libraries to be merged}
  \item{skip}{\code{skip} - an integer for the number of lines to be
    skiped when the libraries are merged}
  \item{pattern}{\code{pattern} - a character string for the pattern to
    be used to get the file SAGE data files from the directory when
    \code{libNames} is for a directory. Only files that match the
    pattern will be merged}
  \item{fileNames}{\code{fileNames} a vector of character strings for
    SAGE libraries to be writtern to DB or used for analysis}
  \item{normalize}{\code{normalize} a character string given the name of
    a function for normalization}
  \item{libNNum}{\code{LibNNum} a matrix with columns for SAGE library
    names and maximum and minimun number of counts}
  \item{uniqTags}{\code{uniqTags} a vecter of character string for the
    unique SAGE tags}
  \item{infoData}{\code{inforData} a matrix containing SAGE library
    information data}
  \item{pace}{\code{pace} an integer for the maximun number of SGAE tags
    to be processed each run when writing SAGE library data to database
    under Windows}
  \item{tags}{\code{tags} a vecter of character string of SAGE tags}
  \item{n}{\code{n} an integer for the number of neighbors defined for
    KNN}
  \item{sep}{\code{sep} a character string for the separator used}
  \item{dbArgs}{\code{dbArgs} a list containing arguments for making
    conntions}
  \item{colNames}{\code{colNames} a vector of character strings for the
    names of columns of a matrix}
  \item{keys}{\code{keys} a vector of character strings for the names of
    key columns of a database}
  \item{numCols}{\code{numCols} see \code{ncol}}
  \item{fileName}{\code{fileName} acharacter string for the name of a
    file to be used to populate a database}
  \item{what}{\code{what} a character string that can be either
    'counts', 'map', or 'info' to indicate what SAGE data to deal with}
  \item{charNum}{\code{charNum} an integer indicating the number of
    characters for the length of character columns in a database}
  \item{type}{\code{type} a character string for the data type of a
    database column}
  \item{countData}{\code{countData} a matrix containing tag counts for
    SAGE libraries}
}

\details{
  Each SAGE library typically contains two columns with the first one
  being SAGE tags and the second one being their
  counts. \code{\link{mergeSAGE}} merges library files based on the
  tags. Tags that are missing from a given library but exist in other
  will be assigned 0s for the library. 

  \code{\link{mergeSAGE}} will generate two files. One contains the
  merged data and the other contains four columns with the first one
  being the column names of the database table to store the SAGE counts,
  the second one being the original SAGE library names, the third being
  the normalization factor that will be used to normalize counts based
  on the library with the smallest number of tags, and the forth being
  the factor based on the library with the largest number of tag.

  \code{\link{getLibInfo}} creates the file that contains the
  information about the data file.

  \code{\link{calNormFact}} calculates the normalization factor.

}
\value{
  \code{\link{mergeSAGE}} returns a list containing two file names
  \item{data}{a character string for the name of the file containing the
    merged data} 
  \item{info}{a character string for the name of the file containing
    information about the merged data}

  \code{\link{getLibInfo}} returns a matrix with four columns.
}
\references{\url{http://www.ncbi.nlm.nih.gov/geo}} 
\author{Jianhua Zhang}
 
\seealso{\code{\link{SAGELyzer}}}
\examples{
path <- tempdir()
# Create two libraries
lib1 <- cbind(paste("tag", 1:10, sep = ""), 1:10)
lib2 <- cbind(paste("tag", 5:9, sep = ""), 15:19)
write.table(lib1, file = file.path(path, "lib1.sage"), sep = "\t",
row.names = FALSE, col.names = FALSE)
write.table(lib2, file = file.path(path, "lib2.sage"), sep = "\t",
row.names = FALSE, col.names = FALSE) 
libNNum <- getLibNNum(c(file.path(path, "lib1.sage"),
file.path(path, "lib2.sage")))
normFact <- calNormFact("min", libNNum)
uniqTag <- getUniqTags(c(file.path(path, "lib1.sage"),
file.path(path, "lib2.sage")), skip = 0)
}

\keyword{manip}