Reading Perseus data in R

As a list

The most basic way to import data from perseus into r is as a list whose elements can then be used in any sort of workflow.

library(PerseusR)

dataFolder <- system.file('extdata', package = 'PerseusR')
dataFiles <- list.files(dataFolder, 
                        pattern = "matrix[[:digit:]]*.txt",
                        full.names=TRUE)
dataFile <- dataFiles[[1]]

default_output <- read.perseus.default(dataFile)
class(default_output)
#> [1] "list"
print(default_output)
#> $main
#>         Column_1   Column_2   Column_3
#> Row.1 -1.8605740 -0.3910594  0.2870352
#> Row.2 -1.0443650  1.4728070 -0.5056471
#> Row.3        NaN -0.4742951  0.8499980
#> Row.4  0.4721563  0.6996973 -0.6752246
#> Row.5  0.7463229 -0.1144230  2.2125310
#> 
#> $annotCols
#>       GO_Process  Name
#> Row.1            Row 1
#> Row.2            Row 2
#> Row.3            Row 3
#> Row.4            Row 4
#> Row.5            Row 5
#> 
#> $annotRows
#>          Grouping
#> Column_1   Group1
#> Column_2   Group2
#> Column_3   Group3
#> 
#> $description
#> [1] ""     ""     ""     ""     "Name"
#> 
#> $imputeData
#>   Column_1 Column_2 Column_3
#> 1    False    False    False
#> 2    False    False    False
#> 3    False    False    False
#> 4    False    False    False
#> 5    False    False    False
#> 
#> $qualityData
#>   Column_1 Column_2 Column_3
#> 1        0        0        0
#> 2        0        0        0
#> 3        0        0        0
#> 4        0        0        0
#> 5        0        0        0

As a MatrixData object

The custom MatrixData object is the most faithfull representation of a perseus matrix in R and is an internal class used by this package to validate whether the fields will be compatible with perseus in additional workflows.

dm_out <- read.perseus.as.matrixData(dataFile)
class(dm_out)
#> [1] "matrixData"
#> attr(,"package")
#> [1] "PerseusR"
print(dm_out)
#> An object of class "matrixData"
#> Slot "main":
#>         Column_1   Column_2   Column_3
#> Row.1 -1.8605740 -0.3910594  0.2870352
#> Row.2 -1.0443650  1.4728070 -0.5056471
#> Row.3        NaN -0.4742951  0.8499980
#> Row.4  0.4721563  0.6996973 -0.6752246
#> Row.5  0.7463229 -0.1144230  2.2125310
#> 
#> Slot "annotCols":
#>       GO_Process  Name
#> Row.1            Row 1
#> Row.2            Row 2
#> Row.3            Row 3
#> Row.4            Row 4
#> Row.5            Row 5
#> 
#> Slot "annotRows":
#>          Grouping
#> Column_1   Group1
#> Column_2   Group2
#> Column_3   Group3
#> 
#> Slot "description":
#> [1] ""     ""     ""     ""     "Name"
#> 
#> Slot "imputeData":
#>   Column_1 Column_2 Column_3
#> 1    False    False    False
#> 2    False    False    False
#> 3    False    False    False
#> 4    False    False    False
#> 5    False    False    False
#> 
#> Slot "qualityData":
#>   Column_1 Column_2 Column_3
#> 1        0        0        0
#> 2        0        0        0
#> 3        0        0        0
#> 4        0        0        0
#> 5        0        0        0

The data matrix object contains the slots that are supported by perseus and can be accessed by the methods of the object (not all of them must be present)

main(dm_out)
#>         Column_1   Column_2   Column_3
#> Row.1 -1.8605740 -0.3910594  0.2870352
#> Row.2 -1.0443650  1.4728070 -0.5056471
#> Row.3        NaN -0.4742951  0.8499980
#> Row.4  0.4721563  0.6996973 -0.6752246
#> Row.5  0.7463229 -0.1144230  2.2125310
annotRows(dm_out)
#>          Grouping
#> Column_1   Group1
#> Column_2   Group2
#> Column_3   Group3
annotCols(dm_out)
#>       GO_Process  Name
#> Row.1            Row 1
#> Row.2            Row 2
#> Row.3            Row 3
#> Row.4            Row 4
#> Row.5            Row 5
PerseusR::description(dm_out) # Biobase has a descr function as well...
#> [1] ""     ""     ""     ""     "Name"

As an expressionSet object (for bioconductor usage)

If you feel more confortable with the bioconductor expression set class, you can also import the data as such.

require(Biobase)
#> Loading required package: Biobase
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:parallel':
#> 
#>     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#>     clusterExport, clusterMap, parApply, parCapply, parLapply,
#>     parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     Filter, Find, Map, Position, Reduce, anyDuplicated, append,
#>     as.data.frame, basename, cbind, colMeans, colSums, colnames,
#>     dirname, do.call, duplicated, eval, evalq, get, grep, grepl,
#>     intersect, is.unsorted, lapply, lengths, mapply, match, mget,
#>     order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind,
#>     rowMeans, rowSums, rownames, sapply, setdiff, sort, table,
#>     tapply, union, unique, unsplit, which, which.max, which.min
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> 
#> Attaching package: 'Biobase'
#> The following objects are masked from 'package:PerseusR':
#> 
#>     description, description<-
eSet_out <- read.perseus.as.ExpressionSet(dataFile)
class(eSet_out)
#> [1] "ExpressionSet"
#> attr(,"package")
#> [1] "Biobase"
print(eSet_out)
#> ExpressionSet (storageMode: lockedEnvironment)
#> assayData: 5 features, 3 samples 
#>   element names: exprs 
#> protocolData: none
#> phenoData
#>   sampleNames: Column_1 Column_2 Column_3
#>   varLabels: Grouping
#>   varMetadata: labelDescription
#> featureData
#>   featureNames: Row.1 Row.2 ... Row.5 (5 total)
#>   fvarLabels: GO_Process Name
#>   fvarMetadata: labelDescription
#> experimentData: use 'experimentData(object)'
#> Annotation:     Name

eSet_out@annotation
#> [1] ""     ""     ""     ""     "Name"

and the equivalentes with the perseus functions would go like this:

The exprs slot in the expressionSet objects would be equivalent to the main data frame in perseus.
The featureData would be equivalent to the annotationCols.
The phenoData would be aquivalent to the annotationRows.
The Annotation would be equivalent to the descr.

df <- matrixData( main = data.frame(a = 1:3, b = 6:8), annotCols = data.frame(c = c('a','b','c')), annotRows = data.frame(x = factor(c('1','1')))) MatrixDataCheck(df) #> [1] TRUE my_error <- try({ matrixData( main = data.frame(a = 1:3, b = 6:8, c = 1:3), annotCols = data.frame(c = c('a','b','c')), annotRows = data.frame(x = factor(c('1','1')))) }) print(my_error) #> [1] "Error in MatrixDataCheck.default(main = mainDF, annotationRows = annotationRows, : \n Size of annotation rows not matching: 3 main columns, but 2 annotations\n" #> attr(,"class") #> [1] "try-error" #> attr(,"condition") #> <simpleError in MatrixDataCheck.default(main = mainDF, annotationRows = annotationRows, annotationCols = annotationCols, descriptions = descriptions, imputeData = imputeData, qualityData = qualityData, all_colnames = all_colnames): Size of annotation rows not matching: 3 main columns, but 2 annotations>

my_list <- list(main = data.frame(a = 1:3, b = 6:8), annotCols = data.frame(c = c('a','b','c')), annotRows = data.frame(x = factor(c('1','1')))) MatrixDataCheck(my_list) #> [1] TRUE my_list <- list(main = data.frame(a = 1:3, b = 6:8, c = 1:3), annotCols = data.frame(c = c('a','b','c')), annotRows = data.frame(x = factor(c('1','1')))) my_error <- try({ MatrixDataCheck(my_list) }) print(my_error) #> [1] "Error in MatrixDataCheck.default(main = object$main, annotationRows = object$annotRows, : \n Size of annotation rows not matching: 3 main columns, but 2 annotations\n" #> attr(,"class") #> [1] "try-error" #> attr(,"condition") #> <simpleError in MatrixDataCheck.default(main = object$main, annotationRows = object$annotRows, annotationCols = object$annotCols, descriptions = object$descriptions, imputeData = object$imputeData, qualityData = object$qualityData, all_colnames = all_colnames): Size of annotation rows not matching: 3 main columns, but 2 annotations>

expressionSet objects

Single matrix Expression Sets will usually be compatible, since most of the restriction in matrix data objects also apply to expression set objects (regarding the dimensions and classes of the objects in each of the slots)

# Here you can use any tipe of conection, similar to th base write... functions # The usage should be fairly similar to the write.table function tmp.file <- tempfile(fileext="txt") write.perseus(dm_out, tmp.file) #> NULL # which would output somethint like this cat(readLines(tmp.file), sep = '\n') #> Column_1 Column_2 Column_3 GO_Process Name #> #!{Description} Name #> #!{Type}E E E C T #> #!{C:Grouping}Group1 Group2 Group3 #> -1.860574 -0.3910594 0.2870352 Row 1 #> -1.044365 1.472807 -0.5056471 Row 2 #> NaN -0.4742951 0.849998 Row 3 #> 0.4721563 0.6996973 -0.6752246 Row 4 #> 0.7463229 -0.114423 2.212531 Row 5

my_df <- data.frame(Con1 = 1:3, Con2 = 4:6, An1 = letters[1:3]) tmp.file <- tempfile(fileext="txt") write.perseus(my_df, con = tmp.file) #> NULL cat(readLines(tmp.file), sep = '\n') #> Con1 Con2 An1 #> #!{Type}E E C #> 1 4 a #> 2 5 b #> 3 6 c

my_matrix <- matrix(1:10, ncol = 2, dimnames = list(letters[11:15], letters[1:2])) tmp.file <- tempfile(fileext="txt") write.perseus(my_matrix, con = tmp.file) #> NULL cat(readLines(tmp.file), sep = '\n') #> a b Names #> #!{Type}E E C #> 1 6 k #> 2 7 l #> 3 8 m #> 4 9 n #> 5 10 o

eSet <- Biobase::ExpressionSet(matrix(1:10, ncol = 2)) tmp.file <- tempfile(fileext="txt") write.perseus(eSet, con = tmp.file) #> NULL cat(readLines(tmp.file), sep = '\n') #> X1 X2 #> #!{Type}E E #> 1 6 #> 2 7 #> 3 8 #> 4 9 #> 5 10

my_matrix <- matrix(1:10, ncol = 2, dimnames = list(letters[11:15], letters[1:2])) my_annotations_rows <- data.frame(My_Names = letters[1:2]) my_annotations_cols <- data.frame(My_Genes = letters[11:15]) tmp.file <- tempfile(fileext="txt") write.perseus(my_matrix, con = tmp.file, annotCols = my_annotations_cols, annotRows = my_annotations_rows) #> NULL cat(readLines(tmp.file), sep = '\n') #> a b My_Genes #> #!{Type}E E C #> #!{C:My_Names}a b #> 1 6 k #> 2 7 l #> 3 8 m #> 4 9 n #> 5 10 o

my_matrix <- matrix(1:10, ncol = 2, dimnames = list(letters[11:15], letters[1:2])) tmp.file <- tempfile(fileext="txt") write.perseus(my_matrix, con = tmp.file, annotCols = NULL) #> NULL cat(readLines(tmp.file), sep = '\n') #> a b Names #> #!{Type}E E C #> 1 6 k #> 2 7 l #> 3 8 m #> 4 9 n #> 5 10 o

Using Perseus data in R

PerseusR Team

2018-11-05

Reading Perseus data in R

As a list

As a MatrixData object

As an expressionSet object (for bioconductor usage)

Checking Data compatibility in R

MatrixData

lists

expressionSet objects

Writting Data into Perseus-compatible text representations

MatrixData

data.frame

matrix

list

expressionSet objects

Adding elements to the output

Removing elements form the output