library(MOSuite)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# replace these lines with the actual paths to your files
gene_counts_tsv <- system.file("extdata",
"RSEM.genes.expected_count.all_samples.txt.gz",
package = "MOSuite"
)
metadata_tsv <- system.file("extdata", "sample_metadata.tsv.gz",
package = "MOSuite"
)
# create multi-omic object
moo <- create_multiOmicDataSet_from_files(
sample_meta_filepath = metadata_tsv,
gene_counts_filepath = gene_counts_tsv
)
#> Rows: 58929 Columns: 6
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (2): gene_id, GeneName
#> dbl (4): KO_S3, KO_S4, WT_S1, WT_S2
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Rows: 4 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (2): sample_id, condition
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
moo <- moo %>% filter_counts(
group_column = "condition",
label_column = "sample_id",
columns_to_include = c("gene_id", "KO_S3", "KO_S4", "WT_S1", "WT_S2"),
minimum_count_value_to_be_considered_nonzero = 1,
minimum_number_of_samples_with_nonzero_counts_in_total = 1,
minimum_number_of_samples_with_nonzero_counts_in_a_group = 1,
)
moo@counts$filt %>% head()
#> gene_id KO_S3 KO_S4 WT_S1 WT_S2
#> 1 ENSG00000072803.17|FBXW11 2 0 0 0
#> 2 ENSG00000083845.9|RPS5 1 0 0 0
#> 3 ENSG00000107371.13|EXOSC3 1 1 0 0
#> 4 ENSG00000111639.8|MRPL51 0 1 0 0
#> 5 ENSG00000111640.15|GAPDH 0 1 0 0
#> 6 ENSG00000111786.9|SRSF9 0 1 0 0