A quick overview of the S4 class system

Hervé Pagès

June 2016

What is S4?

The S4 class system

> sessionInfo()
...
attached base packages:
[1] stats     graphics  grDevices utils     datasets
[6] methods   base

A different world

The syntax

foo(x, ...)

not:

x.foo(...)

like in other OO programming languages.

The central concepts

The result

> ls('package:methods')
  [1] "addNextMethod"                   "allGenerics"
  [3] "allNames"                        "Arith"
  [5] "as"                              "as<-"
  [7] "asMethodDefinition"              "assignClassDef"
...
[211] "testVirtual"                     "traceOff"
[213] "traceOn"                         "tryNew"
[215] "unRematchDefinition"             "validObject"
[217] "validSlotNames"

S4 in Bioconductor

S4 from an end-user point of view

Where do S4 objects come from?

From a dataset

library(graph)
data(apopGraph)
apopGraph
## A graphNEL graph with directed edges
## Number of Nodes = 50 
## Number of Edges = 59

From using an object constructor function

library(IRanges)
IRanges(start=c(101, 25), end=c(110, 80))
## IRanges object with 2 ranges and 0 metadata columns:
##           start       end     width
##       <integer> <integer> <integer>
##   [1]       101       110        10
##   [2]        25        80        56

From a coercion

library(Matrix)
m <- matrix(3:-4, nrow=2)
as(m, "Matrix")
## 2 x 4 Matrix of class "dgeMatrix"
##      [,1] [,2] [,3] [,4]
## [1,]    3    1   -1   -3
## [2,]    2    0   -2   -4

From using a specialized high-level constructor

library(GenomicFeatures)
makeTxDbFromUCSC("sacCer2", tablename="ensGene")
TxDb object:
# Db type: TxDb
# Supporting package: GenomicFeatures
# Data source: UCSC
# Genome: sacCer2
# Organism: Saccharomyces cerevisiae
# Taxonomy ID: 4932
# UCSC Table: ensGene
# UCSC Track: Ensembl Genes
...

From using a high-level I/O function

library(ShortRead)
path_to_my_data <- system.file(
    package="ShortRead",
    "extdata", "Data", "C1-36Firecrest", "Bustard", "GERALD")
lane1 <- readFastq(path_to_my_data, pattern="s_1_sequence.txt")
lane1
## class: ShortReadQ
## length: 256 reads; width: 36 cycles

Inside another object

sread(lane1)
## DNAStringSet object of length 256:
##       width seq
##   [1]    36 GGACTTTGTAGGATACCCTCGCTTTCCTTCTCCTGT
##   [2]    36 GATTTCTTACCTATTAGTGGTTGAACAGCATCGGAC
##   [3]    36 GCGGTGGTCTATAGTGTTATTAATATCAATTTGGGT
##   [4]    36 GTTACCATGATGTTATTTCTTCATTTGGAGGTAAAA
##   [5]    36 GTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTT
##   ...   ... ...
## [252]    36 GTTTAGATATGAGTCACATTTTGTTCATGGTAGAGT
## [253]    36 GTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
## [254]    36 GATGAACTAAGTCAACCTCAGCACTAACCTTGCGAG
## [255]    36 GTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTA
## [256]    36 GCAATCTGCCGACCACTCGCGATTCAATCATGACTT

How to manipulate S4 objects?

Low-level: getters and setters

ir <- IRanges(start=c(101, 25), end=c(110, 80))
width(ir)
## [1] 10 56
width(ir) <- width(ir) - 5
ir
## IRanges object with 2 ranges and 0 metadata columns:
##           start       end     width
##       <integer> <integer> <integer>
##   [1]       101       105         5
##   [2]        25        75        51

High-level: plenty of specialized methods

qa1 <- qa(lane1, lane="lane1")
class(qa1)
## [1] "ShortReadQQA"
## attr(,"package")
## [1] "ShortRead"

How to find the right man page?

showMethods("qa")
## Function: qa (package ShortRead)
## dirPath="ShortReadQ"
## dirPath="SolexaPath"
## dirPath="character"
## dirPath="list"

Inspecting objects and discovering methods

class(lane1)
## [1] "ShortReadQ"
## attr(,"package")
## [1] "ShortRead"
showClass("ShortReadQ")
## Class "ShortReadQ" [package "ShortRead"]
## 
## Slots:
##                                              
## Name:       quality        sread           id
## Class: QualityScore DNAStringSet   BStringSet
## 
## Extends: 
## Class "ShortRead", directly
## Class ".ShortReadBase", by class "ShortRead", distance 2
## 
## Known Subclasses: "AlignedRead"

Implementing an S4 class (in 4 slides)

Class definition and constructor

Class definition

setClass("SNPLocations",
    slots=c(
      genome="character",  # a single string
      snpid="character",   # a character vector of length N
      chrom="character",   # a character vector of length N
      pos="integer"        # an integer vector of length N
    )
)

Constructor

SNPLocations <- function(genome, snpid, chrom, pos)
    new("SNPLocations", genome=genome, snpid=snpid, chrom=chrom, pos=pos)
snplocs <- SNPLocations("hg19",
             c("rs0001", "rs0002"),
             c("chr1", "chrX"),
             c(224033L, 1266886L))

Getters

Defining the length method

setMethod("length", "SNPLocations", function(x) length(x@snpid))
length(snplocs)  # just testing
## [1] 2

Defining the slot getters

setGeneric("genome", function(x) standardGeneric("genome"))
setMethod("genome", "SNPLocations", function(x) x@genome)
setGeneric("snpid", function(x) standardGeneric("snpid"))
setMethod("snpid", "SNPLocations", function(x) x@snpid)
setGeneric("chrom", function(x) standardGeneric("chrom"))
setMethod("chrom", "SNPLocations", function(x) x@chrom)
setGeneric("pos", function(x) standardGeneric("pos"))
setMethod("pos", "SNPLocations", function(x) x@pos)
genome(snplocs)  # just testing
## [1] "hg19"
snpid(snplocs)   # just testing
## [1] "rs0001" "rs0002"

Defining the show method

setMethod("show", "SNPLocations",
    function(object)
        cat(class(object), "instance with", length(object),
            "SNPs on genome", genome(object), "\n")
)
snplocs  # just testing
## SNPLocations instance with 2 SNPs on genome hg19

Defining the validity method

setValidity("SNPLocations",
    function(object) {
        if (!is.character(genome(object)) ||
            length(genome(object)) != 1 || is.na(genome(object)))
            return("'genome' slot must be a single string")
        slot_lengths <- c(length(snpid(object)),
                          length(chrom(object)),
                          length(pos(object)))
        if (length(unique(slot_lengths)) != 1)
            return("lengths of slots 'snpid', 'chrom' and 'pos' differ")
        TRUE
    }
)
snplocs@chrom <- LETTERS[1:3]  # a very bad idea!
validObject(snplocs)
## Error in validObject(snplocs): invalid class "SNPLocations" object: lengths of slots 'snpid', 'chrom' and 'pos' differ

Defining slot setters

setGeneric("chrom<-", function(x, value) standardGeneric("chrom<-"))
setReplaceMethod("chrom", "SNPLocations",
    function(x, value) {x@chrom <- value; validObject(x); x})
chrom(snplocs) <- LETTERS[1:2]  # repair currently broken object
chrom(snplocs) <- LETTERS[1:3]  # try to break it again
## Error in validObject(x): invalid class "SNPLocations" object: lengths of slots 'snpid', 'chrom' and 'pos' differ

Defining a coercion method

setAs("SNPLocations", "data.frame",
    function(from)
        data.frame(snpid=snpid(from), chrom=chrom(from), pos=pos(from))
)
as(snplocs, "data.frame")  # testing
##    snpid chrom     pos
## 1 rs0001     A  224033
## 2 rs0002     B 1266886

Extending an existing class

Slot inheritance

setClass("AnnotatedSNPs",
    contains="SNPLocations",
    slots=c(
        geneid="character"  # a character vector of length N
    )
)
showClass("AnnotatedSNPs")
## Class "AnnotatedSNPs" [in ".GlobalEnv"]
## 
## Slots:
##                                                         
## Name:     geneid    genome     snpid     chrom       pos
## Class: character character character character   integer
## 
## Extends: "SNPLocations"
AnnotatedSNPs <- function(genome, snpid, chrom, pos, geneid)
{
    new("AnnotatedSNPs",
        SNPLocations(genome, snpid, chrom, pos),
        geneid=geneid)
}

Method inheritance

snps <- AnnotatedSNPs("hg19",
             c("rs0001", "rs0002"),
             c("chr1", "chrX"),
             c(224033L, 1266886L),
             c("AAU1", "SXW-23"))
snps
## AnnotatedSNPs instance with 2 SNPs on genome hg19
as(snps, "data.frame")  # the 'geneid' slot is ignored
##    snpid chrom     pos
## 1 rs0001  chr1  224033
## 2 rs0002  chrX 1266886

is(snps, "AnnotatedSNPs")     # 'snps' is an AnnotatedSNPs object
## [1] TRUE
is(snps, "SNPLocations")      # and is also a SNPLocations object
## [1] TRUE
class(snps)                   # but is *not* a SNPLocations *instance*
## [1] "AnnotatedSNPs"
## attr(,"package")
## [1] ".GlobalEnv"
as(snps, "SNPLocations")
## SNPLocations instance with 2 SNPs on genome hg19

Incremental validity method

setValidity("AnnotatedSNPs",
    function(object) {
        if (length(object@geneid) != length(object))
            return("'geneid' slot must have the length of the object")
        TRUE
    }
)

What else?

Other important S4 features

Resources