RSEM counts from RENEE • MOSuite

library(MOSuite)
#> Warning: replacing previous import 'S4Arrays::makeNindexFromArrayViewport' by
#> 'DelayedArray::makeNindexFromArrayViewport' when loading 'SummarizedExperiment'
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

RENEE dataset

# replace these lines with the actual paths to your files
gene_counts_tsv <- system.file("extdata",
  "RSEM.genes.expected_count.all_samples.txt.gz",
  package = "MOSuite"
)
metadata_tsv <- system.file("extdata", "sample_metadata.tsv.gz",
  package = "MOSuite"
)

# create multi-omic object
moo <- create_multiOmicDataSet_from_files(
  sample_meta_filepath = metadata_tsv,
  feature_counts_filepath = gene_counts_tsv
)
#> Rows: 58929 Columns: 6
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (2): gene_id, GeneName
#> dbl (4): KO_S3, KO_S4, WT_S1, WT_S2
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Rows: 4 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (2): sample_id, condition
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(moo@counts$raw)
#> # A tibble: 6 × 5
#>   gene_id            KO_S3 KO_S4 WT_S1 WT_S2
#>   <chr>              <dbl> <dbl> <dbl> <dbl>
#> 1 ENSG00000121410.11     0     0     0     0
#> 2 ENSG00000268895.5      0     0     0     0
#> 3 ENSG00000148584.15     0     0     0     0
#> 4 ENSG00000175899.14     0     0     0     0
#> 5 ENSG00000245105.3      0     0     0     0
#> 6 ENSG00000166535.20     0     0     0     0
head(moo@sample_meta)
#> # A tibble: 4 × 2
#>   sample_id condition
#>   <chr>     <chr>    
#> 1 KO_S3     knockout 
#> 2 KO_S4     knockout 
#> 3 WT_S1     wildtype 
#> 4 WT_S2     wildtype
head(moo@annotation)
#> # A tibble: 6 × 2
#>   gene_id            GeneName
#>   <chr>              <chr>   
#> 1 ENSG00000121410.11 A1BG    
#> 2 ENSG00000268895.5  A1BG-AS1
#> 3 ENSG00000148584.15 A1CF    
#> 4 ENSG00000175899.14 A2M     
#> 5 ENSG00000245105.3  A2M-AS1 
#> 6 ENSG00000166535.20 A2ML1

moo <- moo |>
  clean_raw_counts() |>
  filter_counts(
    group_colname = "condition",
    label_colname = "sample_id",
    minimum_count_value_to_be_considered_nonzero = 1,
    minimum_number_of_samples_with_nonzero_counts_in_total = 1,
    minimum_number_of_samples_with_nonzero_counts_in_a_group = 1,
  ) |>
  normalize_counts(
    group_colname = "condition",
    label_colname = "sample_id"
  ) |>
  diff_counts(
    covariates_colnames = "condition",
    contrast_colname = "condition",
    contrasts = c("knockout-wildtype")
  ) |>
  filter_diff(
    significance_cutoff = 0.05,
    significance_column = "adjpval",
    change_column = "logFC",
    change_cutoff = 1
  )
#> Saving 7.29 x 4.51 in image
#> * cleaning raw counts
#> 
#> Not able to identify multiple id's in gene_id
#> 
#> Columns that can be used to aggregate gene information gene_id
#> 
#> Aggregating the counts for the same ID in different chromosome locations.
#> Column used to Aggregate duplicate IDs: gene_id
#> Number of rows before Collapse: 58929
#> 
#> no duplicated IDs in gene_id
#> 
#> * filtering clean counts
#> 
#> Number of features after filtering: 291
#> 
#> colors_for_plots NULL
#> 
#> colors_for_plots character
#> 
#> Saving 7.29 x 4.51 in image
#> Saving 7.29 x 4.51 in image
#> * normalizing filt counts
#> 
#> Total number of features included: 291
#> 
#> Saving 7.29 x 4.51 in image
#> Saving 7.29 x 4.51 in image
#> Sample columns: KO_S3, Sample columns: KO_S4, Sample columns: WT_S1, Sample columns: WT_S2
#> 
#> * differential counts
#> 
#> Setting first column of `counts` as gene annotation.
#> 
#> Total number of genes included: 291
#> 
#> Saving 7.29 x 4.51 in image
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> * filtering differential features
#> 
#> Total number of genes selected with adjpval < 0.05 and | logFC | ≥ 1 is sum(selgenes)
#> 
#> Saving 7.29 x 4.51 in image

moo@counts$norm$voom |> head()
#>              gene_id     KO_S3     KO_S4     WT_S1     WT_S2
#> 1  ENSG00000215458.8 11.075196 12.348091  8.816153 10.004874
#> 2 ENSG00000160179.18  9.608634 12.770317 12.348091 12.236996
#> 3  ENSG00000258017.1  9.608634  8.816153  8.816153  8.816153
#> 4  ENSG00000282393.1  8.816153  9.608634  8.816153  8.816153
#> 5  ENSG00000286104.1  9.608634  8.816153  8.816153  8.816153
#> 6  ENSG00000274422.1  8.816153  9.608634  8.816153  8.816153

The multiOmicDataSet object structure

str(moo)
#> <MOSuite::multiOmicDataSet>
#>  @ sample_meta: spc_tbl_ [4 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
#>  $ sample_id: chr [1:4] "KO_S3" "KO_S4" "WT_S1" "WT_S2"
#>  $ condition: chr [1:4] "knockout" "knockout" "wildtype" "wildtype"
#>  - attr(*, "spec")=
#>   .. cols(
#>   ..   sample_id = col_character(),
#>   ..   condition = col_character()
#>   .. )
#>  - attr(*, "problems")=<externalptr> 
#>  @ annotation : tibble [58,929 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ gene_id : chr [1:58929] "ENSG00000121410.11" "ENSG00000268895.5" "ENSG00000148584.15" "ENSG00000175899.14" ...
#>  $ GeneName: chr [1:58929] "A1BG" "A1BG-AS1" "A1CF" "A2M" ...
#>  @ counts     :List of 4
#>  .. $ raw  : tibble [58,929 × 5] (S3: tbl_df/tbl/data.frame)
#>  ..  ..$ gene_id: chr [1:58929] "ENSG00000121410.11" "ENSG00000268895.5" "ENSG00000148584.15" "ENSG00000175899.14" ...
#>  ..  ..$ KO_S3  : num [1:58929] 0 0 0 0 0 0 0 0 0 0 ...
#>  ..  ..$ KO_S4  : num [1:58929] 0 0 0 0 0 0 0 0 0 0 ...
#>  ..  ..$ WT_S1  : num [1:58929] 0 0 0 0 0 0 0 0 0 0 ...
#>  ..  ..$ WT_S2  : num [1:58929] 0 0 0 0 0 0 0 0 0 0 ...
#>  .. $ clean:'data.frame':    58929 obs. of  5 variables:
#>  ..  ..$ gene_id: chr [1:58929] "ENSG00000121410.11" "ENSG00000268895.5" "ENSG00000148584.15" "ENSG00000175899.14" ...
#>  ..  ..$ KO_S3  : num [1:58929] 0 0 0 0 0 0 0 0 0 0 ...
#>  ..  ..$ KO_S4  : num [1:58929] 0 0 0 0 0 0 0 0 0 0 ...
#>  ..  ..$ WT_S1  : num [1:58929] 0 0 0 0 0 0 0 0 0 0 ...
#>  ..  ..$ WT_S2  : num [1:58929] 0 0 0 0 0 0 0 0 0 0 ...
#>  .. $ filt :'data.frame':    291 obs. of  5 variables:
#>  ..  ..$ gene_id: chr [1:291] "ENSG00000215458.8" "ENSG00000160179.18" "ENSG00000258017.1" "ENSG00000282393.1" ...
#>  ..  ..$ KO_S3  : num [1:291] 2 1 1 0 1 0 0 0 3 0 ...
#>  ..  ..$ KO_S4  : num [1:291] 4 5 0 1 0 1 0 0 3 0 ...
#>  ..  ..$ WT_S1  : num [1:291] 0 6 0 0 0 0 46 33 9 0 ...
#>  ..  ..$ WT_S2  : num [1:291] 1 7 0 0 0 0 80 31 11 1 ...
#>  .. $ norm :List of 1
#>  ..  ..$ voom:'data.frame':  291 obs. of  5 variables:
#>  ..  .. ..$ gene_id: chr [1:291] "ENSG00000215458.8" "ENSG00000160179.18" "ENSG00000258017.1" "ENSG00000282393.1" ...
#>  ..  .. ..$ KO_S3  : num [1:291] 11.08 9.61 9.61 8.82 9.61 ...
#>  ..  .. ..$ KO_S4  : num [1:291] 12.35 12.77 8.82 9.61 8.82 ...
#>  ..  .. ..$ WT_S1  : num [1:291] 8.82 12.35 8.82 8.82 8.82 ...
#>  ..  .. ..$ WT_S2  : num [1:291] 10 12.24 8.82 8.82 8.82 ...
#>  @ analyses   :List of 3
#>  .. $ colors   :List of 2
#>  ..  ..$ sample_id: Named chr [1:4] "#000000" "#E69F00" "#56B4E9" "#009E73"
#>  ..  .. ..- attr(*, "names")= chr [1:4] "KO_S3" "KO_S4" "WT_S1" "WT_S2"
#>  ..  ..$ condition: Named chr [1:2] "#000000" "#E69F00"
#>  ..  .. ..- attr(*, "names")= chr [1:2] "knockout" "wildtype"
#>  .. $ diff     :List of 1
#>  ..  ..$ knockout-wildtype:'data.frame': 291 obs. of  6 variables:
#>  ..  .. ..$ gene_id: chr [1:291] "ENSG00000215458.8" "ENSG00000160179.18" "ENSG00000258017.1" "ENSG00000282393.1" ...
#>  ..  .. ..$ FC     : num [1:291] 4.69 -2.13 1.32 1.32 1.32 ...
#>  ..  .. ..$ logFC  : num [1:291] 2.23 -1.09 0.396 0.396 0.396 ...
#>  ..  .. ..$ tstat  : num [1:291] 2.956 -0.957 0.557 0.557 0.557 ...
#>  ..  .. ..$ pval   : num [1:291] 0.0648 0.4135 0.6188 0.6188 0.6188 ...
#>  ..  .. ..$ adjpval: num [1:291] 0.185 0.658 0.69 0.69 0.69 ...
#>  .. $ diff_filt:'data.frame':    54 obs. of  6 variables:
#>  ..  ..$ gene_id                  : chr [1:54] "ENSG00000154734.15" "ENSG00000154736.6" "ENSG00000232855.6" "ENSG00000231324.1" ...
#>  ..  ..$ knockout-wildtype_FC     : num [1:54] -54.2 -35.5 -7.17 -2.77 6.7 -6.39 -27.8 -5.62 6.25 6.7 ...
#>  ..  ..$ knockout-wildtype_logFC  : num [1:54] -5.76 -5.15 -2.84 -1.47 2.74 -2.68 -4.8 -2.49 2.64 2.74 ...
#>  ..  ..$ knockout-wildtype_tstat  : num [1:54] -103 -31.1 -14.8 -6.67 8.72 -13.4 -14.4 -11.1 6.94 8.72 ...
#>  ..  ..$ knockout-wildtype_pval   : num [1:54] 4.16e-06 1.19e-04 9.54e-04 8.40e-03 4.07e-03 1.26e-03 1.02e-03 2.09e-03 7.57e-03 4.07e-03 ...
#>  ..  ..$ knockout-wildtype_adjpval: num [1:54] 0.000695 0.00493 0.0139 0.0479 0.0296 0.0146 0.014 0.0194 0.0468 0.0296 ...