Skip to content
library(MOSuite)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
options(moo_print_plots = TRUE)

moo_nidap <- create_multiOmicDataSet_from_dataframes(
  sample_metadata = as.data.frame(nidap_sample_metadata),
  counts_dat = as.data.frame(nidap_raw_counts)
) %>%
  clean_raw_counts() %>%
  filter_counts(group_colname = "Group") %>%
  normalize_counts(group_colname = "Group") %>%
  batch_correct_counts(
    covariates_colname = "Group",
    batch_colname = "Batch",
    label_colname = "Label"
  ) %>%
  diff_counts(
    count_type = "filt",
    covariates_colnames = c("Group", "Batch"),
    contrast_colname = c("Group"),
    contrasts = c("B-A", "C-A", "B-C"),
    input_in_log_counts = FALSE,
    return_mean_and_sd = FALSE,
    voom_normalization_method = "quantile",
  )

#> * cleaning raw counts
#> Not able to identify multiple id's in GeneName
#> Columns that can be used to aggregate gene information GeneName
#> Aggregating the counts for the same ID in different chromosome locations.
#> Column used to Aggregate duplicate IDs: GeneName
#> Number of rows before Collapse: 43280
#> no duplicated IDs in GeneName
#> * filtering clean counts
#> Number of features after filtering: 7943

#> * normalizing filt counts
#> Total number of features included: 7943
#> Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps

#> Sample columns: A1, Sample columns: A2, Sample columns: A3, Sample columns: B1, Sample columns: B2, Sample columns: B3, Sample columns: C1, Sample columns: C2, Sample columns: C3
#> * batch-correcting norm-voom counts
#> Found2batches
#> Adjusting for2covariate(s) or covariate level(s)
#> Standardizing Data across genes
#> Fitting L/S model and finding priors
#> Finding parametric adjustments
#> Adjusting the Data

#> The total number of features in output: 7943
#> Number of samples after batch correction: 10
#> * differential counts
#> Setting first column of `counts` as gene annotation.
#> Total number of genes included: 7943
#> `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'


moo_nidap@analyses$diff %>%
  join_dfs() %>%
  head()
#> Joining with `by = join_by(GeneName)`
#> Joining with `by = join_by(GeneName)`
#>   GeneName   B-A_FC  B-A_logFC B-A_tstat  B-A_pval B-A_adjpval    C-A_FC
#> 1   Mrpl15 1.059250 0.08304265 0.2377167 0.8162052   0.9682636 -1.068725
#> 2   Lypla1 1.370909 0.45513310 1.1301522 0.2810081   0.7797612 -1.066981
#> 3    Tcea1 1.083699 0.11596469 0.3657617 0.7210585   0.9500109 -1.177051
#> 4  Atp6v1h 1.311199 0.39088683 1.1241780 0.2834330   0.7825194 -1.221374
#> 5   Rb1cc1 1.514888 0.59921070 1.3095182 0.2154459   0.7187843  1.313927
#> 6   Pcmtd1 1.112738 0.15411405 0.2497788 0.8070821   0.9663212  3.238362
#>     C-A_logFC  C-A_tstat    C-A_pval C-A_adjpval    B-C_FC  B-C_logFC
#> 1 -0.09589040 -0.2897955 0.777035116  0.89204033  1.132046  0.1789331
#> 2 -0.09353458 -0.2379598 0.816021040  0.91228196  1.462734  0.5486677
#> 3 -0.23517638 -0.7602905 0.462100568  0.69160897  1.275569  0.3511411
#> 4 -0.28850521 -0.8324070 0.421815393  0.65837683  1.601465  0.6793920
#> 5  0.39388567  0.9073581 0.382492765  0.62547476  1.152946  0.2053250
#> 6  1.69526432  3.4021010 0.005417489  0.05722644 -2.910264 -1.5411503
#>    B-C_tstat   B-C_pval B-C_adjpval
#> 1  0.5285947 0.60695058   0.8355016
#> 2  1.3910079 0.19006353   0.4871490
#> 3  1.1281746 0.28180907   0.5893629
#> 4  1.9865449 0.07085778   0.3168579
#> 5  0.4843119 0.63708764   0.8518020
#> 6 -2.9536960 0.01233497   0.1411803