PerseusR provides an interface to use perseus data in R and vice versa
The most basic way to import data from perseus into r is as a list whose elements can then be used in any sort of workflow.
library(PerseusR)
dataFolder <- system.file('extdata', package = 'PerseusR')
dataFiles <- list.files(dataFolder,
pattern = "matrix[[:digit:]]*.txt",
full.names=TRUE)
dataFile <- dataFiles[[1]]
default_output <- read.perseus.default(dataFile)
class(default_output)
#> [1] "list"
print(default_output)
#> $main
#> Column_1 Column_2 Column_3
#> Row.1 -1.8605740 -0.3910594 0.2870352
#> Row.2 -1.0443650 1.4728070 -0.5056471
#> Row.3 NaN -0.4742951 0.8499980
#> Row.4 0.4721563 0.6996973 -0.6752246
#> Row.5 0.7463229 -0.1144230 2.2125310
#>
#> $annotCols
#> GO_Process Name
#> Row.1 Row 1
#> Row.2 Row 2
#> Row.3 Row 3
#> Row.4 Row 4
#> Row.5 Row 5
#>
#> $annotRows
#> Grouping
#> Column_1 Group1
#> Column_2 Group2
#> Column_3 Group3
#>
#> $description
#> [1] "" "" "" "" "Name"
#>
#> $imputeData
#> Column_1 Column_2 Column_3
#> 1 False False False
#> 2 False False False
#> 3 False False False
#> 4 False False False
#> 5 False False False
#>
#> $qualityData
#> Column_1 Column_2 Column_3
#> 1 0 0 0
#> 2 0 0 0
#> 3 0 0 0
#> 4 0 0 0
#> 5 0 0 0
The custom MatrixData
object is the most faithfull representation of a perseus matrix in R and is an internal class used by this package to validate whether the fields will be compatible with perseus in additional workflows.
dm_out <- read.perseus.as.matrixData(dataFile)
class(dm_out)
#> [1] "matrixData"
#> attr(,"package")
#> [1] "PerseusR"
print(dm_out)
#> An object of class "matrixData"
#> Slot "main":
#> Column_1 Column_2 Column_3
#> Row.1 -1.8605740 -0.3910594 0.2870352
#> Row.2 -1.0443650 1.4728070 -0.5056471
#> Row.3 NaN -0.4742951 0.8499980
#> Row.4 0.4721563 0.6996973 -0.6752246
#> Row.5 0.7463229 -0.1144230 2.2125310
#>
#> Slot "annotCols":
#> GO_Process Name
#> Row.1 Row 1
#> Row.2 Row 2
#> Row.3 Row 3
#> Row.4 Row 4
#> Row.5 Row 5
#>
#> Slot "annotRows":
#> Grouping
#> Column_1 Group1
#> Column_2 Group2
#> Column_3 Group3
#>
#> Slot "description":
#> [1] "" "" "" "" "Name"
#>
#> Slot "imputeData":
#> Column_1 Column_2 Column_3
#> 1 False False False
#> 2 False False False
#> 3 False False False
#> 4 False False False
#> 5 False False False
#>
#> Slot "qualityData":
#> Column_1 Column_2 Column_3
#> 1 0 0 0
#> 2 0 0 0
#> 3 0 0 0
#> 4 0 0 0
#> 5 0 0 0
The data matrix object contains the slots that are supported by perseus and can be accessed by the methods of the object (not all of them must be present)
main(dm_out)
#> Column_1 Column_2 Column_3
#> Row.1 -1.8605740 -0.3910594 0.2870352
#> Row.2 -1.0443650 1.4728070 -0.5056471
#> Row.3 NaN -0.4742951 0.8499980
#> Row.4 0.4721563 0.6996973 -0.6752246
#> Row.5 0.7463229 -0.1144230 2.2125310
annotRows(dm_out)
#> Grouping
#> Column_1 Group1
#> Column_2 Group2
#> Column_3 Group3
annotCols(dm_out)
#> GO_Process Name
#> Row.1 Row 1
#> Row.2 Row 2
#> Row.3 Row 3
#> Row.4 Row 4
#> Row.5 Row 5
PerseusR::description(dm_out) # Biobase has a descr function as well...
#> [1] "" "" "" "" "Name"
If you feel more confortable with the bioconductor expression set class, you can also import the data as such.
require(Biobase)
#> Loading required package: Biobase
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:parallel':
#>
#> clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#> clusterExport, clusterMap, parApply, parCapply, parLapply,
#> parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> Filter, Find, Map, Position, Reduce, anyDuplicated, append,
#> as.data.frame, basename, cbind, colMeans, colSums, colnames,
#> dirname, do.call, duplicated, eval, evalq, get, grep, grepl,
#> intersect, is.unsorted, lapply, lengths, mapply, match, mget,
#> order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind,
#> rowMeans, rowSums, rownames, sapply, setdiff, sort, table,
#> tapply, union, unique, unsplit, which, which.max, which.min
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#>
#> Attaching package: 'Biobase'
#> The following objects are masked from 'package:PerseusR':
#>
#> description, description<-
eSet_out <- read.perseus.as.ExpressionSet(dataFile)
class(eSet_out)
#> [1] "ExpressionSet"
#> attr(,"package")
#> [1] "Biobase"
print(eSet_out)
#> ExpressionSet (storageMode: lockedEnvironment)
#> assayData: 5 features, 3 samples
#> element names: exprs
#> protocolData: none
#> phenoData
#> sampleNames: Column_1 Column_2 Column_3
#> varLabels: Grouping
#> varMetadata: labelDescription
#> featureData
#> featureNames: Row.1 Row.2 ... Row.5 (5 total)
#> fvarLabels: GO_Process Name
#> fvarMetadata: labelDescription
#> experimentData: use 'experimentData(object)'
#> Annotation: Name
eSet_out@annotation
#> [1] "" "" "" "" "Name"
and the equivalentes with the perseus functions would go like this:
exprs
slot in the expressionSet objects would be equivalent to the main
data frame in perseus.featureData
would be equivalent to the annotationCols
.phenoData
would be aquivalent to the annotationRows
.Annotation
would be equivalent to the descr
.There are a series of functions that check the compatibility R objects with perseus.
This function would not provide much insight on this kind of object because it is run when constructing it; Therefore if the data is not compatible it would return an error when trying to create it
df <- matrixData(
main = data.frame(a = 1:3, b = 6:8),
annotCols = data.frame(c = c('a','b','c')),
annotRows = data.frame(x = factor(c('1','1'))))
MatrixDataCheck(df)
#> [1] TRUE
my_error <- try({
matrixData(
main = data.frame(a = 1:3, b = 6:8, c = 1:3),
annotCols = data.frame(c = c('a','b','c')),
annotRows = data.frame(x = factor(c('1','1'))))
})
print(my_error)
#> [1] "Error in MatrixDataCheck.default(main = mainDF, annotationRows = annotationRows, : \n Size of annotation rows not matching: 3 main columns, but 2 annotations\n"
#> attr(,"class")
#> [1] "try-error"
#> attr(,"condition")
#> <simpleError in MatrixDataCheck.default(main = mainDF, annotationRows = annotationRows, annotationCols = annotationCols, descriptions = descriptions, imputeData = imputeData, qualityData = qualityData, all_colnames = all_colnames): Size of annotation rows not matching: 3 main columns, but 2 annotations>
In this case this functions would be usefull, since the object is not inherently compatible.
my_list <- list(main = data.frame(a = 1:3, b = 6:8),
annotCols = data.frame(c = c('a','b','c')),
annotRows = data.frame(x = factor(c('1','1'))))
MatrixDataCheck(my_list)
#> [1] TRUE
my_list <- list(main = data.frame(a = 1:3, b = 6:8, c = 1:3),
annotCols = data.frame(c = c('a','b','c')),
annotRows = data.frame(x = factor(c('1','1'))))
my_error <- try({
MatrixDataCheck(my_list)
})
print(my_error)
#> [1] "Error in MatrixDataCheck.default(main = object$main, annotationRows = object$annotRows, : \n Size of annotation rows not matching: 3 main columns, but 2 annotations\n"
#> attr(,"class")
#> [1] "try-error"
#> attr(,"condition")
#> <simpleError in MatrixDataCheck.default(main = object$main, annotationRows = object$annotRows, annotationCols = object$annotCols, descriptions = object$descriptions, imputeData = object$imputeData, qualityData = object$qualityData, all_colnames = all_colnames): Size of annotation rows not matching: 3 main columns, but 2 annotations>
Single matrix Expression Sets will usually be compatible, since most of the restriction in matrix data objects also apply to expression set objects (regarding the dimensions and classes of the objects in each of the slots)
This is the way in which perseus will read the data again so here are a couple of examples.
# Here you can use any tipe of conection, similar to th base write... functions
# The usage should be fairly similar to the write.table function
tmp.file <- tempfile(fileext="txt")
write.perseus(dm_out, tmp.file)
#> NULL
# which would output somethint like this
cat(readLines(tmp.file), sep = '\n')
#> Column_1 Column_2 Column_3 GO_Process Name
#> #!{Description} Name
#> #!{Type}E E E C T
#> #!{C:Grouping}Group1 Group2 Group3
#> -1.860574 -0.3910594 0.2870352 Row 1
#> -1.044365 1.472807 -0.5056471 Row 2
#> NaN -0.4742951 0.849998 Row 3
#> 0.4721563 0.6996973 -0.6752246 Row 4
#> 0.7463229 -0.114423 2.212531 Row 5
Data frames are converted in such manner that numeric columns are transfered as the main DF and non numerics as the annotation cols of the perseus DF
Numeric matrices can be outputed as well :D
Lists are a little trickier, currently it looks for named elements that match the arguments, so it would use the elements named: main
annotCols
annotRows
and descr
my_list <- list(main = data.frame(A = 1:5, B = 6:10),
annotRows = data.frame(is_control = c(TRUE, FALSE)),
annotCols = data.frame(Names = letters[1:5]),
descr = c('something',
'something else',
'yet another thing'))
tmp.file <- tempfile(fileext="txt")
write.perseus(my_list, con = tmp.file)
#> NULL
cat(readLines(tmp.file), sep = '\n')
#> A B Names
#> #!{Description}something something else yet another thing
#> #!{Type}E E C
#> #!{C:is_control}TRUE FALSE
#> 1 6 a
#> 2 7 b
#> 3 8 c
#> 4 9 d
#> 5 10 e
Since not all data typer support allthe elements, one can specify them to the function and they will be passed over to the output.
my_matrix <- matrix(1:10, ncol = 2,
dimnames = list(letters[11:15], letters[1:2]))
my_annotations_rows <- data.frame(My_Names = letters[1:2])
my_annotations_cols <- data.frame(My_Genes = letters[11:15])
tmp.file <- tempfile(fileext="txt")
write.perseus(my_matrix, con = tmp.file,
annotCols = my_annotations_cols,
annotRows = my_annotations_rows)
#> NULL
cat(readLines(tmp.file), sep = '\n')
#> a b My_Genes
#> #!{Type}E E C
#> #!{C:My_Names}a b
#> 1 6 k
#> 2 7 l
#> 3 8 m
#> 4 9 n
#> 5 10 o
Elements can be set to NULL
to remove that section from the output