diff --git a/Project.toml b/Project.toml index 06256eb..5f8a3e8 100644 --- a/Project.toml +++ b/Project.toml @@ -4,12 +4,13 @@ keywords = ["microbiology", "microbiome", "biology", "metagenomics"] license = "MIT" desc = "Convenience functions for working with the bioBakery" authors = ["kescobo ", "annelle-abatoni ", "anikaluo ", "Vanja Klepac-Ceraj "] -version = "0.5.6" +version = "0.6" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" Microbiome = "3bd8f0ae-a0f2-5238-a5af-e1b399a4940c" +ReTest = "e0db7c4e-2690-44b9-bad6-7687da720f89" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" @@ -17,7 +18,8 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] CSV = "0.8, 0.9, 0.10" Conda = "1.5" -Microbiome = "0.8, 0.9" +Microbiome = "0.9" Reexport = "0.2, 1" +ReTest = "0.3" Tables = "1.2.1" julia = "1.6" diff --git a/src/BiobakeryUtils.jl b/src/BiobakeryUtils.jl index b8761e0..0b43070 100644 --- a/src/BiobakeryUtils.jl +++ b/src/BiobakeryUtils.jl @@ -32,6 +32,7 @@ using CSV using Tables using SparseArrays using Conda +using ReTest include("utils.jl") include("metaphlan.jl") diff --git a/test/BiobakeryUtilsTests.jl b/test/BiobakeryUtilsTests.jl new file mode 100644 index 0000000..bbd3024 --- /dev/null +++ b/test/BiobakeryUtilsTests.jl @@ -0,0 +1,117 @@ +module BiobakeryUtilsTests + +using Random +using ReTest +using BiobakeryUtils +using BiobakeryUtils.Conda +using SparseArrays +using DelimitedFiles +using CSV + +isdir(Conda.bin_dir(:BiobakeryUtils)) || BiobakeryUtils.install_deps() +ENV["PATH"] = ENV["PATH"] * ":" * Conda.bin_dir(:BiobakeryUtils) + +@testset "CLI" begin + @testset "Utilities" begin + cmd = ["thing", "foo_bar"] + cmd2 = copy(cmd) + + BiobakeryUtils.add_cli_kwargs!(cmd, Dict(:some_thing=> "foo", :bool=> true)) + @test all(cmd .== ["thing", "foo_bar", "--some_thing", "foo", "--bool"]) + BiobakeryUtils.add_cli_kwargs!(cmd2, Dict(:some_thing=> "foo", :bool=> true); optunderscores=false) + @test all(cmd2 .== ["thing", "foo_bar", "--some-thing", "foo", "--bool"]) + end + + @testset "Metaphlan" begin + @test BiobakeryUtils.check_for_install("metaphlan") |> isnothing + @test BiobakeryUtils.check_for_install("merge_metaphlan_tables.py") |> isnothing + + @test metaphlan("", ""; help=true).exitcode == 0 + + profiles = filter(f-> contains(f, "_profile.tsv"), readdir(joinpath(@__DIR__, "files/metaphlan"), join=true)) + @test metaphlan_merge(profiles, joinpath(@__DIR__, "files/metaphlan/merged_abundance_table.tsv")).exitcode == 0 + end + + @testset "Humann" begin + @test BiobakeryUtils.check_for_install("humann") |> isnothing + @test BiobakeryUtils.check_for_install("humann_rename_table") |> isnothing + @test BiobakeryUtils.check_for_install("humann_renorm_table") |> isnothing + @test BiobakeryUtils.check_for_install("humann_join_tables") |> isnothing + @test BiobakeryUtils.check_for_install("humann") |> isnothing + @test humann("", ""; help=true).exitcode == 0 + + end +end + +@testset "Metaphlan" begin + profile_1 = metaphlan_profile(joinpath(@__DIR__, "files/metaphlan/SRS014464-Anterior_nares_profile.tsv"); sample="SRS014464") + @test profile_1["k__Bacteria", "SRS014464"] == 100.0 + @test profile_1["o__Pseudomonadales", "SRS014464"] == 97.28734 + @test size(profile_1) == (13, 1) + profile_2 = metaphlan_profile(joinpath(@__DIR__, "files/metaphlan/SRS014459-Stool_profile.tsv"), 3) + @test size(profile_2) == (2, 1) + @test profile_2["p__Firmicutes", "SRS014459-Stool_profile"] == 68.90167 + profile_3 = metaphlan_profile(joinpath(@__DIR__, "files/metaphlan/SRS014464-Anterior_nares_profile.tsv"), :phylum) + @test size(profile_3) == (2, 1) + @test profile_3["p__Proteobacteria", 1] == 97.28734 + + merge_profile_1 = metaphlan_profiles(joinpath(@__DIR__, "files/metaphlan/merged_abundance_table.tsv"); samplestart=3) + @test size(merge_profile_1) == (62, 6) + @test merge_profile_1["g__Moraxella", 5] == 97.28734 + merge_profile_2 = metaphlan_profiles(joinpath(@__DIR__, "files/metaphlan/merged_abundance_table.tsv"), :family; samplestart=3) + @test size(merge_profile_2) == (13, 6) + @test merge_profile_2["f__Micrococcaceae", "SRS014464-Anterior_nares"] == 0.0 + merge_profile_3 = metaphlan_profiles(joinpath(@__DIR__, "files/metaphlan/merged_abundance_table.tsv"), 7; samplestart=3) + @test size(merge_profile_3) == (16, 6) + @test merge_profile_3["s__Haemophilus_haemolyticus", 3] == 1.35528 + CSV.write(joinpath(@__DIR__, "files/metaphlan/merged_abundance_table2.csv"), merge_profile_1) + + profiles = filter(f-> contains(f, "_profile.tsv"), readdir(joinpath(@__DIR__, "files/metaphlan"), join=true)) + @test_throws ArgumentError metaphlan_profiles(profiles; samples = ["sample1"]) + multi_profile_1 = metaphlan_profiles(profiles; samples=["sample$i" for i in 1:length(profiles)]) + @test abundances(multi_profile_1) == abundances(metaphlan_profiles(profiles)) + @test size(multi_profile_1) == (62, 6) + @test multi_profile_1["p__Firmicutes", "sample1"] == 68.90167 + multi_profile_2 = metaphlan_profiles(profiles, 3; samples=["sample$i" for i in 1:length(profiles)]) + @test abundances(multi_profile_2) == abundances(metaphlan_profiles(profiles, :class)) + @test size(multi_profile_2) == (6,6) + @test multi_profile_2["c__Bacteroidia", "sample1"] == 31.09833 + + taxstring = "k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_smithii" + taxa = parsetaxa(taxstring) + @test length(taxa) == 7 + @test parsetaxon(taxstring, 1) == Taxon("Archaea", :kingdom) + @test parsetaxon(taxstring, :family) == Taxon("Methanobacteriaceae", :family) + @test parsetaxon(taxstring) == Taxon("Methanobrevibacter_smithii", :species) + @test_throws ArgumentError parsetaxon(taxstring, 8) + + @test parsetaxon("k__Archaea|p__Euryarchaeota|c__Methanobacteria", 2) == Taxon("Euryarchaeota", :phylum) + @test parsetaxon("k__Archaea|p__Euryarchaeota|c__Methanobacteria") == Taxon("Methanobacteria", :class) +end + +@testset "HUMAnN" begin + p1 = humann_profile(joinpath(@__DIR__, "files/humann/single_1.tsv")) + p2 = humann_profile(joinpath(@__DIR__, "files/humann/single_2.tsv")) + @test p1 isa CommunityProfile + @test size(p1) == (560, 1) + @test samplenames(p1) == ["single_1"] + @test samplenames(humann_profile(joinpath(@__DIR__, "files/humann/single_1.tsv"); sample = "sample1")) == ["sample1"] + @test samplenames(humann_profile(joinpath(@__DIR__, "files/humann/single_1.tsv"); sample = MicrobiomeSample("sample1"))) == ["sample1"] + + @test all(f-> !hastaxon(f), features(p1)) # unstratified + @test all(f-> !occursin('|', name(f)), features(p1)) + + pj = humann_profiles(joinpath(@__DIR__, "files/humann/joined.tsv")) + @test size(pj) == (560, 2) + @test isempty(setdiff(features(pj), features(commjoin(p1, p2)))) + @test samplenames(pj) == samplenames(commjoin(p1, p2)) + + pj_strat = humann_profiles(joinpath(@__DIR__, "files/humann/joined.tsv"); stratified = true) + @test size(pj_strat) == (1358, 2) + @test !isempty(setdiff(features(pj_strat), features(pj))) + @test isempty(setdiff(featurenames(pj_strat), featurenames(pj))) + @test isempty(setdiff(features(filter(!hastaxon, pj_strat)), features(pj))) + CSV.write(joinpath(@__DIR__, "files/humann/joined_roundtrip.tsv"), pj_strat; delim='\t') +end + +end # module \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml index 62d4a1c..fa9994f 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -3,5 +3,6 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Microbiome = "3bd8f0ae-a0f2-5238-a5af-e1b399a4940c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ReTest = "e0db7c4e-2690-44b9-bad6-7687da720f89" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index 257f777..1be1bd2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,114 +1,6 @@ -using Random -using Test -using BiobakeryUtils -using BiobakeryUtils.Conda -using SparseArrays -using DelimitedFiles -using CSV - -isdir(Conda.bin_dir(:BiobakeryUtils)) || BiobakeryUtils.install_deps() -ENV["PATH"] = ENV["PATH"] * ":" * Conda.bin_dir(:BiobakeryUtils) - -@testset "CLI" begin - @testset "Utilities" begin - cmd = ["thing", "foo_bar"] - cmd2 = copy(cmd) - - BiobakeryUtils.add_cli_kwargs!(cmd, Dict(:some_thing=> "foo", :bool=> true)) - @test all(cmd .== ["thing", "foo_bar", "--some_thing", "foo", "--bool"]) - BiobakeryUtils.add_cli_kwargs!(cmd2, Dict(:some_thing=> "foo", :bool=> true); optunderscores=false) - @test all(cmd2 .== ["thing", "foo_bar", "--some-thing", "foo", "--bool"]) - end - - @testset "Metaphlan" begin - @test BiobakeryUtils.check_for_install("metaphlan") |> isnothing - @test BiobakeryUtils.check_for_install("merge_metaphlan_tables.py") |> isnothing - - @test metaphlan("", ""; help=true).exitcode == 0 - - profiles = filter(f-> contains(f, "_profile.tsv"), readdir("files/metaphlan", join=true)) - @test metaphlan_merge(profiles, "files/metaphlan/merged_abundance_table.tsv").exitcode == 0 - end - - @testset "Humann" begin - @test BiobakeryUtils.check_for_install("humann") |> isnothing - @test BiobakeryUtils.check_for_install("humann_rename_table") |> isnothing - @test BiobakeryUtils.check_for_install("humann_renorm_table") |> isnothing - @test BiobakeryUtils.check_for_install("humann_join_tables") |> isnothing - @test BiobakeryUtils.check_for_install("humann") |> isnothing - @test humann("", ""; help=true).exitcode == 0 - - end -end - -@testset "Metaphlan" begin - profile_1 = metaphlan_profile("files/metaphlan/SRS014464-Anterior_nares_profile.tsv"; sample="SRS014464") - @test first(abundances(profile_1["Bacteria", "SRS014464"])) == 100.0 - @test first(abundances(profile_1["Pseudomonadales", "SRS014464"])) == 97.28734 - @test size(profile_1) == (13, 1) - profile_2 = metaphlan_profile("files/metaphlan/SRS014459-Stool_profile.tsv", 3) - @test size(profile_2) == (2, 1) - @test first(abundances(profile_2["Firmicutes", "SRS014459-Stool_profile"])) == 68.90167 - profile_3 = metaphlan_profile("files/metaphlan/SRS014464-Anterior_nares_profile.tsv", :phylum) - @test size(profile_3) == (2, 1) - @test first(abundances(profile_3["Proteobacteria", 1])) == 97.28734 - - merge_profile_1 = metaphlan_profiles("files/metaphlan/merged_abundance_table.tsv"; samplestart=3) - @test size(merge_profile_1) == (62, 6) - @test first(abundances(merge_profile_1["Moraxella", 5])) == 97.28734 - merge_profile_2 = metaphlan_profiles("files/metaphlan/merged_abundance_table.tsv", :family; samplestart=3) - @test size(merge_profile_2) == (13, 6) - @test first(abundances(merge_profile_2["Micrococcaceae", "SRS014464-Anterior_nares"])) == 0.0 - merge_profile_3 = metaphlan_profiles("files/metaphlan/merged_abundance_table.tsv", 7; samplestart=3) - @test size(merge_profile_3) == (16, 6) - @test first(abundances(merge_profile_3["Haemophilus_haemolyticus", 3])) == 1.35528 - CSV.write("files/metaphlan/merged_abundance_table2.csv", merge_profile_1) - - profiles = filter(f-> contains(f, "_profile.tsv"), readdir("files/metaphlan", join=true)) - @test_throws ArgumentError metaphlan_profiles(profiles; samples = ["sample1"]) - multi_profile_1 = metaphlan_profiles(profiles; samples=["sample$i" for i in 1:length(profiles)]) - @test abundances(multi_profile_1) == abundances(metaphlan_profiles(profiles)) - @test size(multi_profile_1) == (62, 6) - @test first(abundances(multi_profile_1["Firmicutes", "sample1"])) == 68.90167 - multi_profile_2 = metaphlan_profiles(profiles, 3; samples=["sample$i" for i in 1:length(profiles)]) - @test abundances(multi_profile_2) == abundances(metaphlan_profiles(profiles, :class)) - @test size(multi_profile_2) == (6,6) - @test first(abundances(multi_profile_2["Bacteroidia", "sample1"])) == 31.09833 - - taxstring = "k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_smithii" - taxa = parsetaxa(taxstring) - @test length(taxa) == 7 - @test parsetaxon(taxstring, 1) == Taxon("Archaea", :kingdom) - @test parsetaxon(taxstring, :family) == Taxon("Methanobacteriaceae", :family) - @test parsetaxon(taxstring) == Taxon("Methanobrevibacter_smithii", :species) - @test_throws ArgumentError parsetaxon(taxstring, 8) - - @test parsetaxon("k__Archaea|p__Euryarchaeota|c__Methanobacteria", 2) == Taxon("Euryarchaeota", :phylum) - @test parsetaxon("k__Archaea|p__Euryarchaeota|c__Methanobacteria") == Taxon("Methanobacteria", :class) -end - -@testset "HUMAnN" begin - p1 = humann_profile("files/humann/single_1.tsv") - p2 = humann_profile("files/humann/single_2.tsv") - @test p1 isa CommunityProfile - @test size(p1) == (560, 1) - @test samplenames(p1) == ["single_1"] - @test samplenames(humann_profile("files/humann/single_1.tsv"; sample = "sample1")) == ["sample1"] - @test samplenames(humann_profile("files/humann/single_1.tsv"; sample = MicrobiomeSample("sample1"))) == ["sample1"] - - @test all(f-> !hastaxon(f), features(p1)) # unstratified - @test all(f-> !occursin('|', name(f)), features(p1)) - - pj = humann_profiles("files/humann/joined.tsv") - @test size(pj) == (560, 2) - @test isempty(setdiff(features(pj), features(commjoin(p1, p2)))) - @test samplenames(pj) == samplenames(commjoin(p1, p2)) - - pj_strat = humann_profiles("files/humann/joined.tsv"; stratified = true) - @test size(pj_strat) == (1358, 2) - @test !isempty(setdiff(features(pj_strat), features(pj))) - @test isempty(setdiff(featurenames(pj_strat), featurenames(pj))) - @test isempty(setdiff(features(filter(!hastaxon, pj_strat)), features(pj))) - CSV.write("files/humann/joined_roundtrip.tsv", pj_strat; delim='\t') -end +include("BiobakeryUtilsTests.jl") +BiobakeryUtilsTests.runtests() +# # uncomment if there are ever inline tests +# using BiobakeryUtils +# BiobakeryUtils.runtests() \ No newline at end of file