From 3cdae0446f680e2c94c05263f7f1a2cbb404f62d Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 6 May 2024 18:00:02 +0200 Subject: [PATCH 01/34] Run Tarpaulin job outside a container, because without cmake building umi-transfer fails. --- .github/workflows/testing.yml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 438f2be..7b66fa9 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -102,16 +102,26 @@ jobs: tarpaulin: name: Determine test coverage with Tarpaulin runs-on: ubuntu-latest - container: - image: xd009642/tarpaulin:develop-nightly - options: --security-opt seccomp=unconfined steps: - name: Checkout repository uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + with: + components: tarpaulin + + - name: Cache Rust toolchain + uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2 + - name: Generate code coverage run: | - cargo +nightly tarpaulin --workspace --benches --follow-exec --timeout 120 --out Xml + cargo-tarpaulin --tests --bins --follow-exec --timeout 120 --out Xml + + - uses: actions/upload-artifact@v4 + with: + name: TarpaulinCodeCoverage.xml + path: cobertura.xml From ba816514f7f0a5cff8ec50dbe15331d4a6e4e352 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 9 Apr 2024 20:25:13 +0200 Subject: [PATCH 02/34] Update dependencies. --- Cargo.lock | 604 ++++++++++++++++++++--------------------------------- Cargo.toml | 5 +- 2 files changed, 232 insertions(+), 377 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8bf1fbe..c6d88ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,67 +10,66 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" -version = "1.0.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "anstream" -version = "0.3.2" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", - "is-terminal", "utf8parse", ] [[package]] name = "anstyle" -version = "1.0.1" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anstyle-parse" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ - "windows-sys 0.48.0", + "windows-sys", ] [[package]] name = "anstyle-wincon" -version = "1.0.1" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" dependencies = [ "anstyle", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] name = "anyhow" -version = "1.0.72" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" +checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" [[package]] name = "approx" @@ -83,9 +82,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.12" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" +checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8" dependencies = [ "anstyle", "bstr", @@ -98,9 +97,9 @@ dependencies = [ [[package]] name = "assert_fs" -version = "1.0.13" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f070617a68e5c2ed5d06ee8dd620ee18fb72b99f6c094bed34cf8ab07c875b48" +checksum = "2cd762e110c8ed629b11b6cde59458cc1c71de78ebbcc30099fc8e0403a2a2ec" dependencies = [ "anstyle", "doc-comment", @@ -111,22 +110,11 @@ dependencies = [ "tempfile", ] -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "bio" @@ -169,14 +157,14 @@ dependencies = [ [[package]] name = "bio-types" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c915bf6c578d40e1e497f8c571a4514bc89c3195cec2abb8be6dd5500405c752" +checksum = "9d45749b87f21808051025e9bf714d14ff4627f9d8ca967eade6946ea769aa4a" dependencies = [ "derive-new", "lazy_static", "regex", - "strum_macros 0.24.3", + "strum_macros 0.25.3", "thiserror", ] @@ -197,21 +185,15 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.3.3" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "bstr" -version = "1.6.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" dependencies = [ "memchr", "regex-automata", @@ -230,21 +212,15 @@ dependencies = [ [[package]] name = "bytecount" -version = "0.6.3" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" +checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "cc" -version = "1.0.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cfg-if" @@ -254,20 +230,19 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.3.19" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ "clap_builder", "clap_derive", - "once_cell", ] [[package]] name = "clap_builder" -version = "4.3.19" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" dependencies = [ "anstream", "anstyle", @@ -277,21 +252,21 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.3.12" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.58", ] [[package]] name = "clap_lex" -version = "0.5.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "colorchoice" @@ -301,31 +276,56 @@ checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "console" -version = "0.15.7" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", "lazy_static", "libc", "unicode-width", - "windows-sys 0.45.0", + "windows-sys", ] [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + [[package]] name = "csv" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" dependencies = [ "csv-core", "itoa", @@ -335,9 +335,9 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" dependencies = [ "memchr", ] @@ -385,9 +385,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "encode_unicode" @@ -416,31 +416,26 @@ dependencies = [ ] [[package]] -name = "errno" -version = "0.3.2" +name = "equivalent" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys 0.48.0", -] +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] -name = "errno-dragonfly" -version = "0.1.2" +name = "errno" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "cc", "libc", + "windows-sys", ] [[package]] name = "fastrand" -version = "2.0.0" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" +checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" [[package]] name = "feature-probe" @@ -462,9 +457,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.0.26" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "miniz_oxide", @@ -479,12 +474,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - [[package]] name = "fxhash" version = "0.2.1" @@ -496,9 +485,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.10" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" dependencies = [ "cfg-if", "libc", @@ -519,33 +508,33 @@ dependencies = [ [[package]] name = "globset" -version = "0.4.12" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aca8bbd8e0707c1887a8bbb7e6b40e228f251ff5d62c8220a4a7a53c73aff006" +checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" dependencies = [ "aho-corasick", "bstr", - "fnv", "log", - "regex", + "regex-automata", + "regex-syntax", ] [[package]] name = "globwalk" -version = "0.8.1" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93e3af942408868f6934a7b85134a3230832b9977cf66125df2f9edcfce4ddcc" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ - "bitflags 1.3.2", + "bitflags", "ignore", "walkdir", ] [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" [[package]] name = "heck" @@ -563,85 +552,59 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] -name = "hermit-abi" -version = "0.1.19" +name = "heck" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "ignore" -version = "0.4.20" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492" +checksum = "b46810df39e66e925525d6e38ce1e7f6e1d208f72dc39757880fcb66e2c58af1" dependencies = [ + "crossbeam-deque", "globset", - "lazy_static", "log", "memchr", - "regex", + "regex-automata", "same-file", - "thread_local", "walkdir", "winapi-util", ] [[package]] name = "indexmap" -version = "1.9.3" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ - "autocfg", + "equivalent", "hashbrown", ] -[[package]] -name = "indicatif" -version = "0.17.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ff8cc23a7393a397ed1d7f56e6365cba772aba9f9912ab968b03043c395d057" -dependencies = [ - "console", - "instant", - "number_prefix", - "portable-atomic", - "unicode-width", -] - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - [[package]] name = "is-terminal" -version = "0.4.9" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ - "hermit-abi 0.3.2", - "rustix", - "windows-sys 0.48.0", + "hermit-abi", + "libc", + "windows-sys", ] [[package]] name = "is_ci" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616cde7c720bb2bb5824a224687d8f77bfd38922027f01d825cd7453be5099fb" +checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" [[package]] name = "itertools" @@ -663,9 +626,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "lazy_static" @@ -675,33 +638,33 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.147" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libm" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "linux-raw-sys" -version = "0.4.5" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "log" -version = "0.4.19" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "matrixmultiply" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" +checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" dependencies = [ "autocfg", "rawpointer", @@ -709,15 +672,15 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -790,20 +753,19 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "num-complex" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" dependencies = [ "num-traits", ] [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] @@ -820,26 +782,14 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", "libm", ] -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - -[[package]] -name = "once_cell" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" - [[package]] name = "ordered-float" version = "1.1.1" @@ -851,9 +801,9 @@ dependencies = [ [[package]] name = "owo-colors" -version = "3.5.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +checksum = "caff54706df99d2a78a5a4e3455ff45448d81ef1bb63c22cd14052ca0e993a3f" dependencies = [ "supports-color", ] @@ -866,20 +816,14 @@ checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "petgraph" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", "indexmap", ] -[[package]] -name = "portable-atomic" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f32154ba0af3a075eefa1eda8bb414ee928f62303a54ea85b8d6638ff1a6ee9e" - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -888,14 +832,13 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "predicates" -version = "3.0.3" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" +checksum = "68b87bfd4605926cdfefc1c3b5f8fe560e3feca9d5552cf68c466d3d8236c7e8" dependencies = [ "anstyle", "difflib", "float-cmp", - "itertools", "normalize-line-endings", "predicates-core", "regex", @@ -943,18 +886,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.32" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -1005,20 +948,11 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "regex" -version = "1.9.1" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", @@ -1028,9 +962,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.4" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", @@ -1039,9 +973,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.4" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "rustc_version" @@ -1054,28 +988,28 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.4" +version = "0.38.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" +checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" dependencies = [ - "bitflags 2.3.3", + "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] name = "rustversion" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" +checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -1094,22 +1028,22 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" [[package]] name = "serde" -version = "1.0.180" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.180" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", + "syn 2.0.58", ] [[package]] @@ -1145,9 +1079,9 @@ dependencies = [ [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "strum" @@ -1170,24 +1104,24 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.24.3" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck 0.4.1", "proc-macro2", "quote", "rustversion", - "syn 1.0.109", + "syn 2.0.58", ] [[package]] name = "supports-color" -version = "1.3.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba6faf2ca7ee42fdd458f4347ae0a9bd6bcc445ad7cb57ad82b383f18870d6f" +checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" dependencies = [ - "atty", + "is-terminal", "is_ci", ] @@ -1204,9 +1138,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.28" +version = "2.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" dependencies = [ "proc-macro2", "quote", @@ -1215,15 +1149,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.7.0" +version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", "rustix", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -1234,32 +1167,22 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "thiserror" -version = "1.0.44" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.44" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.28", -] - -[[package]] -name = "thread_local" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" -dependencies = [ - "cfg-if", - "once_cell", + "syn 2.0.58", ] [[package]] @@ -1270,13 +1193,13 @@ checksum = "22048bc95dfb2ffd05b1ff9a756290a009224b60b2f0e7525faeee7603851e63" [[package]] name = "typenum" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "umi-transfer" -version = "1.0.0" +version = "1.5.0" dependencies = [ "anyhow", "assert_cmd", @@ -1286,7 +1209,6 @@ dependencies = [ "dialoguer", "file-format", "flate2", - "indicatif", "itertools", "lazy_static", "owo-colors", @@ -1296,21 +1218,21 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-segmentation" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "utf8parse" @@ -1344,9 +1266,9 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.3.3" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -1376,9 +1298,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] @@ -1391,138 +1313,72 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.1", -] - -[[package]] -name = "windows-targets" -version = "0.42.2" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "windows-targets", ] [[package]] name = "windows-targets" -version = "0.48.1" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ - "windows_aarch64_gnullvm 0.48.0", - "windows_aarch64_msvc 0.48.0", - "windows_i686_gnu 0.48.0", - "windows_i686_msvc 0.48.0", - "windows_x86_64_gnu 0.48.0", - "windows_x86_64_gnullvm 0.48.0", - "windows_x86_64_msvc 0.48.0", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" - -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" -version = "0.48.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "zeroize" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" diff --git a/Cargo.toml b/Cargo.toml index c778876..01d5571 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "umi-transfer" -version = "1.0.0" +version = "1.5.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -9,14 +9,13 @@ edition = "2021" clap = { version = "4.3.11", features = ["derive"] } bio = "0.41.0" lazy_static = "1.4" -indicatif = "0.17.0" flate2 = "1.0.24" itertools = "0.10.5" file-format = "0.7.0" anyhow = "1.0.71" dialoguer = "0.10.4" regex = "1.8.1" -owo-colors = { version = "3.5", features = ["supports-colors"] } +owo-colors = { version = "4.0", features = ["supports-colors"] } [dev-dependencies] assert_cmd = "2.0.11" From 833fad5815e32dae6c4fe5dd4f574f40fca0b8ab Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 10 Apr 2024 13:52:02 +0200 Subject: [PATCH 03/34] Add gzp library for parallel compression. --- Cargo.lock | 260 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + 2 files changed, 261 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index c6d88ef..639e923 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -200,6 +200,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + [[package]] name = "bv" version = "0.11.1" @@ -222,6 +228,18 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + +[[package]] +name = "cc" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" + [[package]] name = "cfg-if" version = "1.0.0" @@ -268,6 +286,15 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.0" @@ -287,6 +314,17 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "core_affinity" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622892f5635ce1fc38c8f16dfc938553ed64af482edb5e150bf4caedbfcb2304" +dependencies = [ + "libc", + "num_cpus", + "winapi", +] + [[package]] name = "crc32fast" version = "1.4.0" @@ -462,6 +500,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", + "libz-sys", "miniz_oxide", ] @@ -474,6 +513,31 @@ dependencies = [ "num-traits", ] +[[package]] +name = "flume" +version = "0.10.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577" +dependencies = [ + "futures-core", + "futures-sink", + "nanorand", + "pin-project", + "spin", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + [[package]] name = "fxhash" version = "0.2.1" @@ -490,8 +554,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -530,6 +596,23 @@ dependencies = [ "walkdir", ] +[[package]] +name = "gzp" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c65d1899521a11810501b50b898464d133e1afc96703cff57726964cfa7baf" +dependencies = [ + "byteorder", + "bytes", + "core_affinity", + "flate2", + "flume", + "libdeflater", + "libz-sys", + "num_cpus", + "thiserror", +] + [[package]] name = "hashbrown" version = "0.14.3" @@ -630,6 +713,15 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -642,18 +734,59 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "libdeflate-sys" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f7b0817f85e2ba608892f30fbf4c9d03f3ebf9db0c952d1b7c8f7387b54785" +dependencies = [ + "cc", +] + +[[package]] +name = "libdeflater" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "671e63282f642c7bcc7d292b212d5a4739fef02a77fe98429a75d308f96e7931" +dependencies = [ + "libdeflate-sys", +] + [[package]] name = "libm" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +[[package]] +name = "libz-sys" +version = "1.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9" +dependencies = [ + "cc", + "cmake", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.21" @@ -723,6 +856,15 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "nanorand" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" +dependencies = [ + "getrandom", +] + [[package]] name = "ndarray" version = "0.15.6" @@ -790,6 +932,22 @@ dependencies = [ "libm", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + [[package]] name = "ordered-float" version = "1.1.1" @@ -824,6 +982,32 @@ dependencies = [ "indexmap", ] +[[package]] +name = "pin-project" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.58", +] + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -1020,6 +1204,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "semver" version = "0.1.20" @@ -1064,6 +1254,15 @@ dependencies = [ "paste", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + [[package]] name = "statrs" version = "0.15.0" @@ -1209,6 +1408,7 @@ dependencies = [ "dialoguer", "file-format", "flate2", + "gzp", "itertools", "lazy_static", "owo-colors", @@ -1240,6 +1440,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "vec_map" version = "0.8.2" @@ -1280,6 +1486,60 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.58", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.58", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 01d5571..f4c38c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ anyhow = "1.0.71" dialoguer = "0.10.4" regex = "1.8.1" owo-colors = { version = "4.0", features = ["supports-colors"] } +gzp = "0.11.3" [dev-dependencies] assert_cmd = "2.0.11" From 3da0567f2a5bfc4480e9f6350d2d322d8ee219ac Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 10 Apr 2024 16:11:04 +0200 Subject: [PATCH 04/34] Drop custom types, due to name collisions those just complicated matters. --- src/file_io.rs | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 8eda587..eeea6de 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -3,35 +3,30 @@ use anyhow::{anyhow, Context, Result}; use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; use regex::Regex; -use std::{fs, path::Path, path::PathBuf}; - -// Defining types for simplicity -type File = std::fs::File; -type Fastq = std::io::BufReader; -type Gzip = flate2::bufread::MultiGzDecoder; +use std::{fs, fs::File, path::Path, path::PathBuf}; // Enum for the two acceptable input file formats: '.fastq' and '.fastq.gz' -pub enum ReadFile { - Fastq(std::io::BufReader), - Gzip(Box), +pub enum InputFile { + Plain(std::io::BufReader), + Compressed(Box>>), } -// Implement read for ReadFile enum -impl std::io::Read for ReadFile { +// Implement read for InputFile enum +impl std::io::Read for InputFile { fn read(&mut self, into: &mut [u8]) -> std::io::Result { match self { - ReadFile::Fastq(buf_reader) => buf_reader.read(into), - ReadFile::Gzip(buf_reader) => buf_reader.read(into), + InputFile::Plain(buf_reader) => buf_reader.read(into), + InputFile::Compressed(buf_reader) => buf_reader.read(into), } } } // Enum for the two accepted output formats, '.fastq' and '.fastq.gz' pub enum OutputFile { - Fastq { + Plain { read: bio::io::fastq::Writer, }, - Gzip { + Compressed { read: bio::io::fastq::Writer>, }, } @@ -45,12 +40,12 @@ impl OutputFile { s: bio::io::fastq::Record, ) -> Result { match self { - OutputFile::Fastq { mut read } => match read.write(header, desc, s.seq(), s.qual()) { - Ok(_) => Ok(OutputFile::Fastq { read }), + OutputFile::Plain { mut read } => match read.write(header, desc, s.seq(), s.qual()) { + Ok(_) => Ok(OutputFile::Plain { read }), Err(_) => Err(anyhow!(RuntimeErrors::ReadWriteError(s))), }, - OutputFile::Gzip { mut read } => match read.write(header, desc, s.seq(), s.qual()) { - Ok(_) => Ok(OutputFile::Gzip { read }), + OutputFile::Compressed { mut read } => match read.write(header, desc, s.seq(), s.qual()) { + Ok(_) => Ok(OutputFile::Compressed { read }), Err(_) => Err(anyhow!(RuntimeErrors::ReadWriteError(s))), }, } @@ -58,21 +53,21 @@ impl OutputFile { } // Read input file to Reader. Automatically scans if input is compressed with file-format crate. -pub fn read_fastq(path: &PathBuf) -> Result>> { +pub fn read_fastq(path: &PathBuf) -> Result>> { fs::metadata(path).map_err(|_e| anyhow!(RuntimeErrors::FileNotFound(Some(path.into()))))?; let format = FileFormat::from_file(path).context("Failed to determine file format")?; - let reader: ReadFile = match format { + let reader: InputFile = match format { FileFormat::Gzip => { let file = File::open(path) .map(std::io::BufReader::new) .with_context(|| format!("Failed to open file: {:?}", path))?; - ReadFile::Gzip(Box::new(flate2::bufread::MultiGzDecoder::new(file))) + InputFile::Compressed(Box::new(flate2::bufread::MultiGzDecoder::new(file))) } _ => { let file = File::open(path).with_context(|| format!("Failed to open file: {:?}", path))?; - ReadFile::Fastq(std::io::BufReader::new(file)) + InputFile::Plain(std::io::BufReader::new(file)) } }; @@ -82,14 +77,14 @@ pub fn read_fastq(path: &PathBuf) -> Result Result { if *compress { - Ok(OutputFile::Gzip { + Ok(OutputFile::Compressed { read: std::fs::File::create(name.as_path()) .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) .map(bio::io::fastq::Writer::new) .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, }) } else { - Ok(OutputFile::Fastq { + Ok(OutputFile::Plain { read: std::fs::File::create(name.as_path()) .map(bio::io::fastq::Writer::new) .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, From 5ecda090b76646f5717fff88f9b778c6bf93be28 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 10 Apr 2024 20:32:12 +0200 Subject: [PATCH 05/34] Doesn't work like this...dyn ZWriter cannot be sent between threads safely the trait Send is not implemented for dyn ZWriter --- src/file_io.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index eeea6de..fa33bbb 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -2,8 +2,9 @@ use super::umi_errors::RuntimeErrors; use anyhow::{anyhow, Context, Result}; use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; +use gzp::{deflate::Gzip, ZBuilder, ZWriter}; use regex::Regex; -use std::{fs, fs::File, path::Path, path::PathBuf}; +use std::{fs, fs::File, io::Write, path::Path, path::PathBuf}; // Enum for the two acceptable input file formats: '.fastq' and '.fastq.gz' pub enum InputFile { @@ -27,7 +28,7 @@ pub enum OutputFile { read: bio::io::fastq::Writer, }, Compressed { - read: bio::io::fastq::Writer>, + read: bio::io::fastq::Writer>, }, } @@ -39,12 +40,13 @@ impl OutputFile { desc: Option<&str>, s: bio::io::fastq::Record, ) -> Result { + let record = bio::io::fastq::Record::with_attrs(header, desc, s.seq(), s.qual()); match self { - OutputFile::Plain { mut read } => match read.write(header, desc, s.seq(), s.qual()) { + OutputFile::Plain { mut read } => match read.write_record(&record) { Ok(_) => Ok(OutputFile::Plain { read }), Err(_) => Err(anyhow!(RuntimeErrors::ReadWriteError(s))), }, - OutputFile::Compressed { mut read } => match read.write(header, desc, s.seq(), s.qual()) { + OutputFile::Compressed { mut read } => match read.write_record(&record) { Ok(_) => Ok(OutputFile::Compressed { read }), Err(_) => Err(anyhow!(RuntimeErrors::ReadWriteError(s))), }, @@ -79,8 +81,11 @@ pub fn output_file(name: PathBuf, compress: &bool) -> Result { if *compress { Ok(OutputFile::Compressed { read: std::fs::File::create(name.as_path()) - .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) - .map(bio::io::fastq::Writer::new) + .map(|w| { + let writer = ZBuilder::::new().num_threads(0).from_writer(w); + let writer: Box = Box::new(writer); + bio::io::fastq::Writer::new(writer) + }) .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, }) } else { From 3f9480687e88f92a951043807ffa3232548c8ed3 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 11 Apr 2024 16:37:46 +0200 Subject: [PATCH 06/34] Intermediate commit, refactor file for clarity. --- src/file_io.rs | 84 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index fa33bbb..8158fe1 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -2,10 +2,14 @@ use super::umi_errors::RuntimeErrors; use anyhow::{anyhow, Context, Result}; use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; -use gzp::{deflate::Gzip, ZBuilder, ZWriter}; +use gzp::{deflate::Gzip, par::compress::Compression, ZBuilder, ZWriter}; use regex::Regex; use std::{fs, fs::File, io::Write, path::Path, path::PathBuf}; +//////////////////////////////////////////////////////////////// +// READ INPUT FILE +//////////////////////////////////////////////////////////////// + // Enum for the two acceptable input file formats: '.fastq' and '.fastq.gz' pub enum InputFile { Plain(std::io::BufReader), @@ -22,14 +26,37 @@ impl std::io::Read for InputFile { } } +// Read input file to Reader. Automatically scans if input is compressed with file-format crate. +pub fn read_fastq(path: &PathBuf) -> Result>> { + fs::metadata(path).map_err(|_e| anyhow!(RuntimeErrors::FileNotFound(Some(path.into()))))?; + + let format = FileFormat::from_file(path).context("Failed to determine file format")?; + let reader: InputFile = match format { + FileFormat::Gzip => { + let file = File::open(path) + .map(std::io::BufReader::new) + .with_context(|| format!("Failed to open file: {:?}", path))?; + InputFile::Compressed(Box::new(flate2::bufread::MultiGzDecoder::new(file))) + } + _ => { + let file = + File::open(path).with_context(|| format!("Failed to open file: {:?}", path))?; + InputFile::Plain(std::io::BufReader::new(file)) + } + }; + + Ok(bio::io::fastq::Reader::new(reader)) +} + + +//////////////////////////////////////////////////////////////// +// WRITE OUTPUT FILE +//////////////////////////////////////////////////////////////// + // Enum for the two accepted output formats, '.fastq' and '.fastq.gz' pub enum OutputFile { - Plain { - read: bio::io::fastq::Writer, - }, - Compressed { - read: bio::io::fastq::Writer>, - }, + Plain, + Compressed, } // Implement write for OutputFile enum @@ -52,28 +79,25 @@ impl OutputFile { }, } } -} - -// Read input file to Reader. Automatically scans if input is compressed with file-format crate. -pub fn read_fastq(path: &PathBuf) -> Result>> { - fs::metadata(path).map_err(|_e| anyhow!(RuntimeErrors::FileNotFound(Some(path.into()))))?; - - let format = FileFormat::from_file(path).context("Failed to determine file format")?; - let reader: InputFile = match format { - FileFormat::Gzip => { - let file = File::open(path) - .map(std::io::BufReader::new) - .with_context(|| format!("Failed to open file: {:?}", path))?; - InputFile::Compressed(Box::new(flate2::bufread::MultiGzDecoder::new(file))) - } - _ => { - let file = - File::open(path).with_context(|| format!("Failed to open file: {:?}", path))?; - InputFile::Plain(std::io::BufReader::new(file)) + /// Create a compressor writer matching the selected format + fn create_compressor( + &self, + writer: W, + num_threads: usize, + compression_level: u32, + pin_at: Option, + ) -> Box + where + W: Write + Send + 'static, + { + match self { + OutputFile::Compressed => ZBuilder::::new() + .num_threads(num_threads) + .compression_level(Compression::new(compression_level)) + .pin_threads(pin_at) + .from_writer(writer) } - }; - - Ok(bio::io::fastq::Reader::new(reader)) + } } // Create output files @@ -119,6 +143,10 @@ pub fn write_to_file( } } +//////////////////////////////////////////////////////////////// +// OTHER UTILITIES +//////////////////////////////////////////////////////////////// + // Checks whether an output path exists. pub fn check_outputpath(path: PathBuf, force: &bool) -> Result { // Skip overwrite prompt for "/dev/null" -> can/will be used for singletons. From b585b226d7a627cc015af6bde4ffbeca51f05213 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 11 Apr 2024 17:52:16 +0200 Subject: [PATCH 07/34] Refactor functions and disentagle editing the FastQ records from writing them to the output file. --- src/umi_external.rs | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/src/umi_external.rs b/src/umi_external.rs index cbeee4c..0de0e48 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -123,8 +123,8 @@ pub fn run(args: OptsExternal) -> Result { println!("Output 1 will be saved to: {}", output1.to_string_lossy()); println!("Output 2 will be saved to: {}", output2.to_string_lossy()); - let mut write_file_r1 = file_io::output_file(output1, &args.gzip)?; - let mut write_file_r2 = file_io::output_file(output2, &args.gzip)?; + //let mut write_file_r1 = file_io::output_file(output1, &args.gzip)?; + //let mut write_file_r2 = file_io::output_file(output2, &args.gzip)?; // Record counter let mut counter: i32 = 0; @@ -133,8 +133,8 @@ pub fn run(args: OptsExternal) -> Result { // Iterate over records in input files for (r1_rec_res, ru_rec_res, r2_rec_res) in izip!(r1, ru, r2) { - let r1_rec = r1_rec_res?; - let r2_rec = r2_rec_res?; + let mut r1_rec = r1_rec_res?; + let mut r2_rec = r2_rec_res?; let ru_rec = ru_rec_res?; // Step counter @@ -143,13 +143,15 @@ pub fn run(args: OptsExternal) -> Result { if r1_rec.id().eq(ru_rec.id()) { // Write to Output file let read_nr = if edit_nr { Some(1) } else { None }; - write_file_r1 = file_io::write_to_file( + let r1_rec = update_record( r1_rec, - write_file_r1, ru_rec.seq(), args.delim.as_ref(), read_nr, )?; + + //TODO: Write record to output file + } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatch)); } @@ -157,13 +159,15 @@ pub fn run(args: OptsExternal) -> Result { if r2_rec.id().eq(ru_rec.id()) { // Write to Output file let read_nr = if edit_nr { Some(2) } else { None }; - write_file_r2 = file_io::write_to_file( + let r2_rec = update_record( r2_rec, - write_file_r2, ru_rec.seq(), args.delim.as_ref(), read_nr, )?; + + //TODO: Write record to output file + } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatch)); } @@ -171,3 +175,26 @@ pub fn run(args: OptsExternal) -> Result { println!("Processed {:?} records", counter); Ok(counter) } + + +// Updates the header and description of the reads accordingly +fn update_record( + input: bio::io::fastq::Record, + umi: &[u8], + umi_sep: Option<&String>, + edit_nr: Option, +) -> Result { + let delim = umi_sep.as_ref().map(|s| s.as_str()).unwrap_or(":"); // the delimiter for the UMI + if let Some(number) = edit_nr { + let new_id = &[input.id(), delim, std::str::from_utf8(umi).unwrap()].concat(); + let mut new_desc = String::from(input.desc().unwrap()); + new_desc.replace_range(0..1, &number.to_string()); + let desc: Option<&str> = Some(&new_desc); + let new_record = bio::io::fastq::Record::with_attrs(new_id, desc, input.seq(), input.qual()); + Ok(new_record) + } else { + let new_id = &[input.id(), delim, std::str::from_utf8(umi).unwrap()].concat(); + let new_record = bio::io::fastq::Record::with_attrs(new_id, input.desc(), input.seq(), input.qual()); + Ok(new_record) + } +} \ No newline at end of file From 7ed748394b0f72b99dd6fcfacdbcb981d387cc88 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 12 Apr 2024 19:23:23 +0200 Subject: [PATCH 08/34] Reimplement new writer logic for plain and compressed output. --- src/file_io.rs | 101 +++++++++++++------------------------------------ 1 file changed, 27 insertions(+), 74 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 8158fe1..fa8904e 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -4,7 +4,7 @@ use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; use gzp::{deflate::Gzip, par::compress::Compression, ZBuilder, ZWriter}; use regex::Regex; -use std::{fs, fs::File, io::Write, path::Path, path::PathBuf}; +use std::{fs, fs::File, io::BufWriter, io::Write, path::Path, path::PathBuf}; //////////////////////////////////////////////////////////////// // READ INPUT FILE @@ -55,91 +55,44 @@ pub fn read_fastq(path: &PathBuf) -> Result), + Compressed(Box), } -// Implement write for OutputFile enum -impl OutputFile { - pub fn write( - self, - header: &str, - desc: Option<&str>, - s: bio::io::fastq::Record, - ) -> Result { - let record = bio::io::fastq::Record::with_attrs(header, desc, s.seq(), s.qual()); +impl std::io::Write for OutputFile { + fn write(&mut self, buf: &[u8]) -> std::io::Result { match self { - OutputFile::Plain { mut read } => match read.write_record(&record) { - Ok(_) => Ok(OutputFile::Plain { read }), - Err(_) => Err(anyhow!(RuntimeErrors::ReadWriteError(s))), - }, - OutputFile::Compressed { mut read } => match read.write_record(&record) { - Ok(_) => Ok(OutputFile::Compressed { read }), - Err(_) => Err(anyhow!(RuntimeErrors::ReadWriteError(s))), - }, + OutputFile::Plain(writer) => writer.write(buf), + OutputFile::Compressed(writer) => writer.write(buf), } } - /// Create a compressor writer matching the selected format - fn create_compressor( - &self, - writer: W, - num_threads: usize, - compression_level: u32, - pin_at: Option, - ) -> Box - where - W: Write + Send + 'static, - { + + fn flush(&mut self) -> std::io::Result<()> { match self { - OutputFile::Compressed => ZBuilder::::new() - .num_threads(num_threads) - .compression_level(Compression::new(compression_level)) - .pin_threads(pin_at) - .from_writer(writer) + OutputFile::Plain(writer) => writer.flush(), + OutputFile::Compressed(writer) => writer.flush(), } } } -// Create output files -pub fn output_file(name: PathBuf, compress: &bool) -> Result { - if *compress { - Ok(OutputFile::Compressed { - read: std::fs::File::create(name.as_path()) - .map(|w| { - let writer = ZBuilder::::new().num_threads(0).from_writer(w); - let writer: Box = Box::new(writer); - bio::io::fastq::Writer::new(writer) - }) - .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, - }) - } else { - Ok(OutputFile::Plain { - read: std::fs::File::create(name.as_path()) - .map(bio::io::fastq::Writer::new) - .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, - }) - } -} - -// Writes record with properly inserted UMI to Output file -pub fn write_to_file( - input: bio::io::fastq::Record, - output: OutputFile, - umi: &[u8], - umi_sep: Option<&String>, - edit_nr: Option, +pub fn create_writer( + path: PathBuf, + compress: bool, + num_threads: usize, + compression_level: u32, + pin_at: Option, ) -> Result { - let s = input; - let delim = umi_sep.as_ref().map(|s| s.as_str()).unwrap_or(":"); // the delimiter for the UMI - if let Some(number) = edit_nr { - let header = &[s.id(), delim, std::str::from_utf8(umi).unwrap()].concat(); - let mut string = String::from(s.desc().unwrap()); - string.replace_range(0..1, &number.to_string()); - let desc: Option<&str> = Some(&string); - output.write(header, desc, s) + let file = File::create(&path) + .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(path.clone()))))?; + if compress { + let writer = ZBuilder::::new() + .num_threads(num_threads) + .compression_level(Compression::new(compression_level)) + .pin_threads(pin_at) + .from_writer(file); + Ok(OutputFile::Compressed(writer)) } else { - let header = &[s.id(), delim, std::str::from_utf8(umi).unwrap()].concat(); - output.write(header, s.desc(), s.clone()) + Ok(OutputFile::Plain(BufWriter::new(file))) } } From 532ac8999baee01808f249a866bdf648d3cb2d51 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Sat, 13 Apr 2024 20:54:29 +0200 Subject: [PATCH 09/34] Finish the output writers: Integrate with bio::io::fastq::Writer --- src/file_io.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index fa8904e..f640d67 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -1,5 +1,7 @@ use super::umi_errors::RuntimeErrors; use anyhow::{anyhow, Context, Result}; +use bio::io::fastq::Writer as FastqWriter; +use bio::utils::TextSlice; use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; use gzp::{deflate::Gzip, par::compress::Compression, ZBuilder, ZWriter}; @@ -55,22 +57,21 @@ pub fn read_fastq(path: &PathBuf) -> Result), - Compressed(Box), + Plain(FastqWriter), + Compressed(FastqWriter>), } -impl std::io::Write for OutputFile { - fn write(&mut self, buf: &[u8]) -> std::io::Result { +impl OutputFile { + pub fn write_record( + &mut self, + id: &str, + desc: Option<&str>, + seq: TextSlice<'_>, + qual: &[u8], + ) -> std::io::Result<()> { match self { - OutputFile::Plain(writer) => writer.write(buf), - OutputFile::Compressed(writer) => writer.write(buf), - } - } - - fn flush(&mut self) -> std::io::Result<()> { - match self { - OutputFile::Plain(writer) => writer.flush(), - OutputFile::Compressed(writer) => writer.flush(), + OutputFile::Plain(writer) => writer.write(id, desc, seq, qual), + OutputFile::Compressed(writer) => writer.write(id, desc, seq, qual), } } } @@ -90,9 +91,9 @@ pub fn create_writer( .compression_level(Compression::new(compression_level)) .pin_threads(pin_at) .from_writer(file); - Ok(OutputFile::Compressed(writer)) + Ok(OutputFile::Compressed(FastqWriter::from_bufwriter(BufWriter::new(writer)))) } else { - Ok(OutputFile::Plain(BufWriter::new(file))) + Ok(OutputFile::Plain(FastqWriter::new(file))) } } From 771c52f696c0828abae5077150b63707a7b43fc3 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Sun, 14 Apr 2024 01:17:42 +0200 Subject: [PATCH 10/34] Integrate new multi-threaded writer into the main program. --- src/file_io.rs | 24 ++++++++----------- src/umi_external.rs | 57 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index f640d67..6e028f2 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -1,7 +1,6 @@ use super::umi_errors::RuntimeErrors; use anyhow::{anyhow, Context, Result}; -use bio::io::fastq::Writer as FastqWriter; -use bio::utils::TextSlice; +use bio::io::fastq::{Record, Writer as FastqWriter}; use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; use gzp::{deflate::Gzip, par::compress::Compression, ZBuilder, ZWriter}; @@ -64,31 +63,28 @@ pub enum OutputFile { impl OutputFile { pub fn write_record( &mut self, - id: &str, - desc: Option<&str>, - seq: TextSlice<'_>, - qual: &[u8], + record: Record, ) -> std::io::Result<()> { match self { - OutputFile::Plain(writer) => writer.write(id, desc, seq, qual), - OutputFile::Compressed(writer) => writer.write(id, desc, seq, qual), + OutputFile::Plain(writer) => writer.write(record.id(), record.desc(), record.seq(), record.qual()), + OutputFile::Compressed(writer) => writer.write(record.id(), record.desc(), record.seq(), record.qual()), } } } pub fn create_writer( path: PathBuf, - compress: bool, - num_threads: usize, - compression_level: u32, + compress: &bool, + num_threads: &usize, + compression_level: &Option, pin_at: Option, ) -> Result { let file = File::create(&path) .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(path.clone()))))?; - if compress { + if *compress { let writer = ZBuilder::::new() - .num_threads(num_threads) - .compression_level(Compression::new(compression_level)) + .num_threads(*num_threads) + .compression_level(compression_level.map_or_else(Default::default, |l| Compression::new((l as u32).clamp(1, 9)))) .pin_threads(pin_at) .from_writer(file); Ok(OutputFile::Compressed(FastqWriter::from_bufwriter(BufWriter::new(writer)))) diff --git a/src/umi_external.rs b/src/umi_external.rs index 0de0e48..6ec88c4 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -1,7 +1,7 @@ use anyhow::{anyhow, Context, Result}; use clap::Parser; use itertools::izip; -use std::path::PathBuf; +use std::{path::PathBuf, thread}; use super::file_io; use crate::umi_errors::RuntimeErrors; @@ -21,6 +21,27 @@ pub struct OptsExternal { \n " )] gzip: bool, + #[clap( + short = 'l', + long = "compression_level", + help = "Choose the compression level: Maximum 9, defaults to 3. Higher numbers result in smaller files but take longer to compress. + \n ", + )] + compression_level: Option, + #[clap( + short = 't', + long = "threads", + help = "Number of threads to use for processing. Defaults to the number of logical cores available. + \n ", + )] + num_threads: Option, + //#[clap( + // short = 'p', + // long = "pin_threads", + // help = "Pin threads to physical cores. This can provide a significant performance improvement, but has the downside of possibly conflicting with other pinned cores. + // \n " + //)] + // pin_threads: bool, #[clap( short = 'f', long = "force", @@ -72,12 +93,24 @@ pub struct OptsExternal { } pub fn run(args: OptsExternal) -> Result { + // Enables editing id in output file 2 if --edit-nr flag was included let mut edit_nr = false; if args.edit_nr { edit_nr = true; } + // Set the number of threads to max, unless manually specified. In case of failure, use only 1. + let num_threads = args.num_threads.unwrap_or_else(|| { + thread::available_parallelism() + .map(|cores| cores.get()) + .with_context(|| { + format!( + "Failed to determine number of available threads. Please specify manually with --threads." + )}) + .unwrap_or_else(|_| {1}) + }); + // Read FastQ records from input files let r1 = file_io::read_fastq(&args.r1_in) .with_context(|| { @@ -123,8 +156,18 @@ pub fn run(args: OptsExternal) -> Result { println!("Output 1 will be saved to: {}", output1.to_string_lossy()); println!("Output 2 will be saved to: {}", output2.to_string_lossy()); - //let mut write_file_r1 = file_io::output_file(output1, &args.gzip)?; - //let mut write_file_r2 = file_io::output_file(output2, &args.gzip)?; + let mut write_output_r1 = file_io::create_writer( + output1, + &args.gzip, + &num_threads, + &args.compression_level, + None)?; + let mut write_output_r2 = file_io::create_writer( + output2, + &args.gzip, + &num_threads, + &args.compression_level, + None)?; // Record counter let mut counter: i32 = 0; @@ -133,8 +176,8 @@ pub fn run(args: OptsExternal) -> Result { // Iterate over records in input files for (r1_rec_res, ru_rec_res, r2_rec_res) in izip!(r1, ru, r2) { - let mut r1_rec = r1_rec_res?; - let mut r2_rec = r2_rec_res?; + let r1_rec = r1_rec_res?; + let r2_rec = r2_rec_res?; let ru_rec = ru_rec_res?; // Step counter @@ -150,7 +193,7 @@ pub fn run(args: OptsExternal) -> Result { read_nr, )?; - //TODO: Write record to output file + write_output_r1.write_record(r1_rec)?; } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatch)); @@ -166,7 +209,7 @@ pub fn run(args: OptsExternal) -> Result { read_nr, )?; - //TODO: Write record to output file + write_output_r2.write_record(r2_rec)?; } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatch)); From 2c5d250c1e3623aba53fbecd876ca8a236470a79 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Sun, 14 Apr 2024 01:40:47 +0200 Subject: [PATCH 11/34] Directly map output writing error to RuntimeErrors::ReadWriteError --- src/file_io.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 6e028f2..713b466 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -5,7 +5,7 @@ use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; use gzp::{deflate::Gzip, par::compress::Compression, ZBuilder, ZWriter}; use regex::Regex; -use std::{fs, fs::File, io::BufWriter, io::Write, path::Path, path::PathBuf}; +use std::{fs, fs::File, io::BufWriter, path::Path, path::PathBuf}; //////////////////////////////////////////////////////////////// // READ INPUT FILE @@ -64,10 +64,14 @@ impl OutputFile { pub fn write_record( &mut self, record: Record, - ) -> std::io::Result<()> { + ) -> Result<()> { match self { - OutputFile::Plain(writer) => writer.write(record.id(), record.desc(), record.seq(), record.qual()), - OutputFile::Compressed(writer) => writer.write(record.id(), record.desc(), record.seq(), record.qual()), + OutputFile::Plain(writer) => + writer.write(record.id(), record.desc(), record.seq(), record.qual()) + .map_err(|_| anyhow!(RuntimeErrors::ReadWriteError(record))), + OutputFile::Compressed(writer) => + writer.write(record.id(), record.desc(), record.seq(), record.qual()) + .map_err(|_| anyhow!(RuntimeErrors::ReadWriteError(record))), } } } From 6d3c414ccee24b81ca761f7ca728f651ff2f02be Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 25 Apr 2024 15:38:02 +0200 Subject: [PATCH 12/34] Lint code --- Cargo.lock | 4 ++-- src/file_io.rs | 26 +++++++++++------------ src/umi_external.rs | 50 ++++++++++++++++++--------------------------- 3 files changed, 35 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 639e923..8b39ca8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -236,9 +236,9 @@ checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cc" -version = "1.0.92" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" +checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b" [[package]] name = "cfg-if" diff --git a/src/file_io.rs b/src/file_io.rs index 713b466..a6228f2 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -49,7 +49,6 @@ pub fn read_fastq(path: &PathBuf) -> Result Result<()> { + pub fn write_record(&mut self, record: Record) -> Result<()> { match self { - OutputFile::Plain(writer) => - writer.write(record.id(), record.desc(), record.seq(), record.qual()) - .map_err(|_| anyhow!(RuntimeErrors::ReadWriteError(record))), - OutputFile::Compressed(writer) => - writer.write(record.id(), record.desc(), record.seq(), record.qual()) - .map_err(|_| anyhow!(RuntimeErrors::ReadWriteError(record))), + OutputFile::Plain(writer) => writer + .write(record.id(), record.desc(), record.seq(), record.qual()) + .map_err(|_| anyhow!(RuntimeErrors::ReadWriteError(record))), + OutputFile::Compressed(writer) => writer + .write(record.id(), record.desc(), record.seq(), record.qual()) + .map_err(|_| anyhow!(RuntimeErrors::ReadWriteError(record))), } } } @@ -88,10 +84,14 @@ pub fn create_writer( if *compress { let writer = ZBuilder::::new() .num_threads(*num_threads) - .compression_level(compression_level.map_or_else(Default::default, |l| Compression::new((l as u32).clamp(1, 9)))) + .compression_level(compression_level.map_or_else(Default::default, |l| { + Compression::new((l as u32).clamp(1, 9)) + })) .pin_threads(pin_at) .from_writer(file); - Ok(OutputFile::Compressed(FastqWriter::from_bufwriter(BufWriter::new(writer)))) + Ok(OutputFile::Compressed(FastqWriter::from_bufwriter( + BufWriter::new(writer), + ))) } else { Ok(OutputFile::Plain(FastqWriter::new(file))) } diff --git a/src/umi_external.rs b/src/umi_external.rs index 6ec88c4..7098483 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -25,14 +25,14 @@ pub struct OptsExternal { short = 'l', long = "compression_level", help = "Choose the compression level: Maximum 9, defaults to 3. Higher numbers result in smaller files but take longer to compress. - \n ", + \n " )] compression_level: Option, #[clap( short = 't', long = "threads", help = "Number of threads to use for processing. Defaults to the number of logical cores available. - \n ", + \n " )] num_threads: Option, //#[clap( @@ -93,7 +93,6 @@ pub struct OptsExternal { } pub fn run(args: OptsExternal) -> Result { - // Enables editing id in output file 2 if --edit-nr flag was included let mut edit_nr = false; if args.edit_nr { @@ -157,17 +156,19 @@ pub fn run(args: OptsExternal) -> Result { println!("Output 2 will be saved to: {}", output2.to_string_lossy()); let mut write_output_r1 = file_io::create_writer( - output1, - &args.gzip, - &num_threads, - &args.compression_level, - None)?; + output1, + &args.gzip, + &num_threads, + &args.compression_level, + None, + )?; let mut write_output_r2 = file_io::create_writer( - output2, - &args.gzip, - &num_threads, + output2, + &args.gzip, + &num_threads, &args.compression_level, - None)?; + None, + )?; // Record counter let mut counter: i32 = 0; @@ -186,15 +187,9 @@ pub fn run(args: OptsExternal) -> Result { if r1_rec.id().eq(ru_rec.id()) { // Write to Output file let read_nr = if edit_nr { Some(1) } else { None }; - let r1_rec = update_record( - r1_rec, - ru_rec.seq(), - args.delim.as_ref(), - read_nr, - )?; + let r1_rec = update_record(r1_rec, ru_rec.seq(), args.delim.as_ref(), read_nr)?; write_output_r1.write_record(r1_rec)?; - } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatch)); } @@ -202,15 +197,9 @@ pub fn run(args: OptsExternal) -> Result { if r2_rec.id().eq(ru_rec.id()) { // Write to Output file let read_nr = if edit_nr { Some(2) } else { None }; - let r2_rec = update_record( - r2_rec, - ru_rec.seq(), - args.delim.as_ref(), - read_nr, - )?; + let r2_rec = update_record(r2_rec, ru_rec.seq(), args.delim.as_ref(), read_nr)?; write_output_r2.write_record(r2_rec)?; - } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatch)); } @@ -219,7 +208,6 @@ pub fn run(args: OptsExternal) -> Result { Ok(counter) } - // Updates the header and description of the reads accordingly fn update_record( input: bio::io::fastq::Record, @@ -233,11 +221,13 @@ fn update_record( let mut new_desc = String::from(input.desc().unwrap()); new_desc.replace_range(0..1, &number.to_string()); let desc: Option<&str> = Some(&new_desc); - let new_record = bio::io::fastq::Record::with_attrs(new_id, desc, input.seq(), input.qual()); + let new_record = + bio::io::fastq::Record::with_attrs(new_id, desc, input.seq(), input.qual()); Ok(new_record) } else { let new_id = &[input.id(), delim, std::str::from_utf8(umi).unwrap()].concat(); - let new_record = bio::io::fastq::Record::with_attrs(new_id, input.desc(), input.seq(), input.qual()); + let new_record = + bio::io::fastq::Record::with_attrs(new_id, input.desc(), input.seq(), input.qual()); Ok(new_record) } -} \ No newline at end of file +} From 6786745bfe62ecc582c6a66abcbf7448c5b2e333 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 25 Apr 2024 18:40:02 +0200 Subject: [PATCH 13/34] Update Tarpaulin command to include the integration tests as well. --- .github/workflows/testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 7b66fa9..e96ef5e 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -121,7 +121,7 @@ jobs: - uses: actions/upload-artifact@v4 with: name: TarpaulinCodeCoverage.xml - path: cobertura.xml + path: cobertura.xmlgit st From 2a25c00445900b70fba9928b71315a2462e4c585 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 29 Apr 2024 19:29:20 +0200 Subject: [PATCH 14/34] Attempt fixing issues detected by Clippy. --- src/file_io.rs | 2 +- src/umi_external.rs | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index a6228f2..8412afe 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -85,7 +85,7 @@ pub fn create_writer( let writer = ZBuilder::::new() .num_threads(*num_threads) .compression_level(compression_level.map_or_else(Default::default, |l| { - Compression::new((l as u32).clamp(1, 9)) + Compression::new((l).clamp(1, 9)) })) .pin_threads(pin_at) .from_writer(file); diff --git a/src/umi_external.rs b/src/umi_external.rs index 7098483..f902c5b 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -103,11 +103,10 @@ pub fn run(args: OptsExternal) -> Result { let num_threads = args.num_threads.unwrap_or_else(|| { thread::available_parallelism() .map(|cores| cores.get()) - .with_context(|| { - format!( + .unwrap_or_else(|_| { + eprintln!( "Failed to determine number of available threads. Please specify manually with --threads." - )}) - .unwrap_or_else(|_| {1}) + ); 1}) }); // Read FastQ records from input files From 64152eaecd193c2948d1937c1c609c06d18d3714 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 29 Apr 2024 19:46:20 +0200 Subject: [PATCH 15/34] Update dependencies to latest. --- Cargo.lock | 296 +++++++++++++++++++++++++---------------------------- Cargo.toml | 19 ++-- 2 files changed, 146 insertions(+), 169 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b39ca8..d5d2f28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,9 +67,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" [[package]] name = "approx" @@ -118,9 +118,9 @@ checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "bio" -version = "0.41.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a69423e30444738eccc5e54eccee75779dd3f15ecc0469b95d8529d4b6b7586" +checksum = "7a72cb93babf08c85b375c2938ac678cc637936b3ebb72266d433cec2577f6c2" dependencies = [ "anyhow", "approx", @@ -130,10 +130,10 @@ dependencies = [ "bytecount", "csv", "custom_derive", + "editdistancek", "enum-map", "fxhash", - "getset", - "itertools", + "itertools 0.11.0", "itertools-num", "lazy_static", "multimap", @@ -149,7 +149,7 @@ dependencies = [ "serde_derive", "statrs", "strum", - "strum_macros 0.23.1", + "strum_macros", "thiserror", "triple_accel", "vec_map", @@ -164,7 +164,7 @@ dependencies = [ "derive-new", "lazy_static", "regex", - "strum_macros 0.25.3", + "strum_macros", "thiserror", ] @@ -218,9 +218,15 @@ dependencies = [ [[package]] name = "bytecount" -version = "0.6.7" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" + +[[package]] +name = "bytemuck" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" [[package]] name = "byteorder" @@ -277,7 +283,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -399,13 +405,14 @@ dependencies = [ [[package]] name = "dialoguer" -version = "0.10.4" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59c6f2989294b9a498d3ad5491a79c6deb604617378e1cdc4bfc1c1361fe2f87" +checksum = "658bce805d770f407bc62102fca7c2c64ceef2fbcb2b8bd19d2765ce093980de" dependencies = [ "console", "shell-words", "tempfile", + "thiserror", "zeroize", ] @@ -421,11 +428,17 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "editdistancek" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e02df23d5b1c6f9e69fa603b890378123b93073df998a21e6e33b9db0a32613" + [[package]] name = "either" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" [[package]] name = "encode_unicode" @@ -435,22 +448,22 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "enum-map" -version = "1.1.1" +version = "2.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e893a7ba6116821058dec84a6fb14fb2a97cd8ce5fd0f85d5a4e760ecd7329d9" +checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9" dependencies = [ "enum-map-derive", ] [[package]] name = "enum-map-derive" -version = "0.6.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84278eae0af6e34ff6c1db44c11634a694aafac559ff3080e4db4e4ac35907aa" +checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.60", ] [[package]] @@ -471,9 +484,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "feature-probe" @@ -483,9 +496,9 @@ checksum = "835a3dc7d1ec9e75e2b5fb4ba75396837112d2060b03f7d43bc1897c7f7211da" [[package]] name = "file-format" -version = "0.7.0" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fb46518b6034ba6dbc075ca73277d66cbd488c651c2ccc7255c62b00ce48d24" +checksum = "4ba1b81b3c213cf1c071f8bf3b83531f310df99642e58c48247272eef006cae5" [[package]] name = "fixedbitset" @@ -495,9 +508,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.0.28" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" dependencies = [ "crc32fast", "libz-sys", @@ -560,18 +573,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "getset" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" -dependencies = [ - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "globset" version = "0.4.14" @@ -615,18 +616,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" - -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "heck" @@ -691,9 +683,18 @@ checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" [[package]] name = "itertools" -version = "0.10.5" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] @@ -779,9 +780,9 @@ checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", @@ -820,18 +821,18 @@ dependencies = [ [[package]] name = "multimap" -version = "0.8.3" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +checksum = "e1a5d38b9b352dbd913288736af36af41c48d61b1a8cd34bcecd727561b7d511" dependencies = [ "serde", ] [[package]] name = "nalgebra" -version = "0.27.1" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "462fffe4002f4f2e1f6a9dcf12cc1a6fc0e15989014efc02a941d3e0f5dc2120" +checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff" dependencies = [ "approx", "matrixmultiply", @@ -950,9 +951,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "ordered-float" -version = "1.1.1" +version = "3.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" +checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" dependencies = [ "num-traits", ] @@ -999,7 +1000,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1044,44 +1045,20 @@ dependencies = [ "termtree", ] -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - [[package]] name = "proc-macro2" -version = "1.0.79" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -1172,9 +1149,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.32" +version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ "bitflags", "errno", @@ -1195,6 +1172,15 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +[[package]] +name = "safe_arch" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f398075ce1e6a179b46f51bd88d0598b92b00d3551f1a2d4ac49e771b56ac354" +dependencies = [ + "bytemuck", +] + [[package]] name = "same-file" version = "1.0.6" @@ -1218,22 +1204,22 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" [[package]] name = "serde" -version = "1.0.197" +version = "1.0.199" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "0c9f6e76df036c77cd94996771fb40db98187f096dd0b9af39c6c6e452ba966a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.199" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "11bd257a6541e141e42ca6d24ae26f7714887b47e89aa739099104c7e4d3b7fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1244,14 +1230,15 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "simba" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e82063457853d00243beda9952e910b82593e4b07ae9f721b9278a99a0d3d5c" +checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" dependencies = [ "approx", "num-complex", "num-traits", "paste", + "wide", ] [[package]] @@ -1265,9 +1252,9 @@ dependencies = [ [[package]] name = "statrs" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05bdbb8e4e78216a85785a85d3ec3183144f98d0097b9281802c019bb07a6f05" +checksum = "2d08e5e1748192713cc281da8b16924fb46be7b0c2431854eadc785823e5696e" dependencies = [ "approx", "lazy_static", @@ -1284,22 +1271,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "strum" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" - -[[package]] -name = "strum_macros" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" -dependencies = [ - "heck 0.3.3", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", -] +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" [[package]] name = "strum_macros" @@ -1311,7 +1285,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1337,9 +1311,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.58" +version = "2.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" dependencies = [ "proc-macro2", "quote", @@ -1366,22 +1340,22 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1409,8 +1383,7 @@ dependencies = [ "file-format", "flate2", "gzp", - "itertools", - "lazy_static", + "itertools 0.12.1", "owo-colors", "predicates", "regex", @@ -1422,17 +1395,11 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-segmentation" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" - [[package]] name = "unicode-width" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" [[package]] name = "utf8parse" @@ -1455,12 +1422,6 @@ dependencies = [ "serde", ] -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - [[package]] name = "wait-timeout" version = "0.2.0" @@ -1507,7 +1468,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", "wasm-bindgen-shared", ] @@ -1529,7 +1490,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1540,6 +1501,16 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +[[package]] +name = "wide" +version = "0.7.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81a1851a719f11d1d2fea40e15c72f6c00de8c142d7ac47c1441cc7e4d0d5bc6" +dependencies = [ + "bytemuck", + "safe_arch", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1558,11 +1529,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys", ] [[package]] @@ -1582,13 +1553,14 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", + "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", @@ -1597,45 +1569,51 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" -version = "0.52.4" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "zeroize" diff --git a/Cargo.toml b/Cargo.toml index f4c38c4..7ffda87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,21 +7,20 @@ edition = "2021" [dependencies] clap = { version = "4.3.11", features = ["derive"] } -bio = "0.41.0" -lazy_static = "1.4" +bio = "1.6.0" flate2 = "1.0.24" -itertools = "0.10.5" -file-format = "0.7.0" -anyhow = "1.0.71" -dialoguer = "0.10.4" -regex = "1.8.1" +itertools = "0.12.1" +file-format = "0.24.0" +anyhow = "1.0.82" +dialoguer = "0.11.0" +regex = "1.10.4" owo-colors = { version = "4.0", features = ["supports-colors"] } gzp = "0.11.3" [dev-dependencies] -assert_cmd = "2.0.11" -assert_fs = "1.0.13" -predicates = "3.0.3" +assert_cmd = "2.0.14" +assert_fs = "1.1.1" +predicates = "3.1.0" [workspace.metadata.marker.lints] marker_lints = "0.5.0" From 6e116e23bf55307b3f999aba987a59e3a5603aad Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 29 Apr 2024 20:24:53 +0200 Subject: [PATCH 16/34] After the update, dialoguer doesn't show the prompt anymore, so can't test. --- src/file_io.rs | 7 ++++--- src/main.rs | 2 +- tests/integration_tests_external.rs | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 8412afe..ec174ef 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -84,9 +84,10 @@ pub fn create_writer( if *compress { let writer = ZBuilder::::new() .num_threads(*num_threads) - .compression_level(compression_level.map_or_else(Default::default, |l| { - Compression::new((l).clamp(1, 9)) - })) + .compression_level( + compression_level + .map_or_else(Default::default, |l| Compression::new((l).clamp(1, 9))), + ) .pin_threads(pin_at) .from_writer(file); Ok(OutputFile::Compressed(FastqWriter::from_bufwriter( diff --git a/src/main.rs b/src/main.rs index df355a1..d8e0598 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,7 +25,7 @@ https://github.com/SciLifeLab/umi-transfer #[derive(clap::Parser)] #[clap( - version = "1.0.0", + version = "1.5.0dev", author = "Written by Judit Hohenthal, Matthias Zepper & Johannes Alneberg", about = "A tool for transferring Unique Molecular Identifiers (UMIs).", long_about = "Most tools capable of using UMIs to increase the accuracy of quantitative DNA sequencing experiments expect the respective UMI sequence to be embedded into the reads' IDs. You can use `umi-transfer external` to retrieve UMIs from a separate FastQ file and embed them to the IDs of your paired FastQ files." diff --git a/tests/integration_tests_external.rs b/tests/integration_tests_external.rs index 5e185e9..4ee3345 100644 --- a/tests/integration_tests_external.rs +++ b/tests/integration_tests_external.rs @@ -220,9 +220,9 @@ fn external_fails_with_existing_output_file_and_no_force() { cmd.assert() .failure() .stderr(predicate::str::contains("Failed to include the UMIs")) - .stderr(predicate::str::contains("Caused by:")) - .stderr(predicate::str::contains("exists. Overwrite? (y/n)")) - .stderr(predicate::str::contains("Not a terminal")); + //.stderr(predicate::str::contains("Caused by:")) + //.stderr(predicate::str::contains("exists. Overwrite? (y/n)")) + .stderr(predicate::str::contains("not a terminal")); temp_dir .child("read2_out.fq") From 1a0df50954f49647e7917048438bd05aae462bc7 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 2 May 2024 22:03:42 +0200 Subject: [PATCH 17/34] Fixed the integration text that relies on an interactive prompt with the rexpect crate. --- .github/workflows/testing.yml | 7 ++-- Cargo.lock | 59 ++++++++++++++++++++++++++++- Cargo.toml | 1 + tests/integration_tests_external.rs | 27 ++++++++----- 4 files changed, 80 insertions(+), 14 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index e96ef5e..62f2e23 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -108,8 +108,9 @@ jobs: - name: Setup Rust uses: dtolnay/rust-toolchain@stable - with: - components: tarpaulin + + - name: Install tarpaulin + run: cargo install cargo-tarpaulin - name: Cache Rust toolchain uses: Swatinem/rust-cache@23bce251a8cd2ffc3c1075eaa2367cf899916d84 # v2 @@ -121,7 +122,7 @@ jobs: - uses: actions/upload-artifact@v4 with: name: TarpaulinCodeCoverage.xml - path: cobertura.xmlgit st + path: cobertura.xml diff --git a/Cargo.lock b/Cargo.lock index d5d2f28..0d9af00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -183,6 +183,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.5.0" @@ -307,6 +313,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "comma" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335" + [[package]] name = "console" version = "0.15.8" @@ -592,7 +604,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ - "bitflags", + "bitflags 2.5.0", "ignore", "walkdir", ] @@ -810,6 +822,15 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "miniz_oxide" version = "0.7.2" @@ -888,6 +909,20 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset", + "pin-utils", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -1003,6 +1038,12 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.30" @@ -1138,6 +1179,19 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +[[package]] +name = "rexpect" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01ff60778f96fb5a48adbe421d21bf6578ed58c0872d712e7e08593c195adff8" +dependencies = [ + "comma", + "nix", + "regex", + "tempfile", + "thiserror", +] + [[package]] name = "rustc_version" version = "0.1.7" @@ -1153,7 +1207,7 @@ version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ - "bitflags", + "bitflags 2.5.0", "errno", "libc", "linux-raw-sys", @@ -1387,6 +1441,7 @@ dependencies = [ "owo-colors", "predicates", "regex", + "rexpect", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 7ffda87..578272c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ gzp = "0.11.3" assert_cmd = "2.0.14" assert_fs = "1.1.1" predicates = "3.1.0" +rexpect = "0.5.0" [workspace.metadata.marker.lints] marker_lints = "0.5.0" diff --git a/tests/integration_tests_external.rs b/tests/integration_tests_external.rs index 4ee3345..ad36362 100644 --- a/tests/integration_tests_external.rs +++ b/tests/integration_tests_external.rs @@ -1,6 +1,9 @@ use assert_cmd::Command; use assert_fs::prelude::*; use predicates::prelude::*; +use std::process::Command as StdCommand; + +extern crate rexpect; #[path = "auxiliary.rs"] mod auxiliary; @@ -196,7 +199,7 @@ fn external_fails_with_nonexisting_output_file() { #[test] fn external_fails_with_existing_output_file_and_no_force() { - let (mut cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); + let (_cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); // create an existing output file temp_dir @@ -204,6 +207,12 @@ fn external_fails_with_existing_output_file_and_no_force() { .write_str("GCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATA") .unwrap(); + // This test comprises an interactive prompt, which is not supported by assert_cmd. + // Therefore, we use rexpect to run the test in a session and must use + // a different Command type: std::process::Command instead of assert_cmd::Command. + + let bin_path = assert_cmd::cargo::cargo_bin("umi-transfer"); + let mut cmd = StdCommand::new(bin_path); cmd.arg("external") .arg("--in") .arg(test_files.read1_gz) @@ -214,15 +223,15 @@ fn external_fails_with_existing_output_file_and_no_force() { .arg("--out") .arg(test_files.new_output_read1_gz) .arg("--out2") - .arg(test_files.new_output_read2_gz) - .write_stdin("yes\n".as_bytes()); + .arg(test_files.new_output_read2_gz); - cmd.assert() - .failure() - .stderr(predicate::str::contains("Failed to include the UMIs")) - //.stderr(predicate::str::contains("Caused by:")) - //.stderr(predicate::str::contains("exists. Overwrite? (y/n)")) - .stderr(predicate::str::contains("not a terminal")); + // Evaluate that the prompt is shown, but do not overwrite the existing file. + + let mut p = rexpect::session::spawn_command(cmd, Some(10000)).unwrap(); + p.exp_string("read1_out.fq exists. Overwrite?").unwrap(); + p.send_line("n").unwrap(); + p.exp_string("read1_out.fq exists, but must not be overwritten.") + .unwrap(); temp_dir .child("read2_out.fq") From 4d7814e738938cf8943eec9cc6bfb6f47ab7f755 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 6 May 2024 19:20:38 +0200 Subject: [PATCH 18/34] Add Codecov upload. --- .github/workflows/testing.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 62f2e23..9c08743 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -123,6 +123,11 @@ jobs: with: name: TarpaulinCodeCoverage.xml path: cobertura.xml + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} From 075d487a7fa9c8848d1bb0aad817a6ef8e86100b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 6 May 2024 20:20:15 +0200 Subject: [PATCH 19/34] Add shield.io badges to README. --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 462f53b..55e6d75 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,13 @@
+[![License:MIT](https://img.shields.io/badge/License-MIT-491f53.svg)](https://opensource.org/licenses/MIT) +![GitHub Actions Tests](https://img.shields.io/github/actions/workflow/status/SciLifeLab/umi-transfer/.github%2Fworkflows%2Ftesting.yml?branch=dev&logo=github&label=Tests&color=%23a7c947) +[![codecov](https://codecov.io/gh/SciLifeLab/umi-transfer/branch/dev/graph/badge.svg)](https://codecov.io/gh/SciLifeLab/umi-transfer) +![GitHub Actions Build](https://img.shields.io/github/actions/workflow/status/SciLifeLab/umi-transfer/.github%2Fworkflows%2Frelease.yml?branch=dev&label=Binary%20builds&logo=github&color=%23a7c947) +[![GitHub Actions Build](https://img.shields.io/github/actions/workflow/status/SciLifeLab/umi-transfer/.github%2Fworkflows%2Fcontainer.yml?branch=dev&label=Docker%20builds&logo=docker&color=%23a7c947)](https://hub.docker.com/r/mzscilifelab/umi-transfer) +[![install with Bioconda](https://img.shields.io/badge/Available%20via-Bioconda-045c64.svg)](https://bioconda.github.io/recipes/umi-transfer/README.html) + ## Background To increase the accuracy of quantitative DNA sequencing experiments, Unique Molecular Identifiers may be used. UMIs are short sequences used to uniquely tag each molecule in a sample library, enabling precise identification of read duplicates. They must be added during library preparation and prior to sequencing, therefore require appropriate arrangements with your sequencing provider. From 56442e4bc0b99ba72bacc8458a761a32ea0b0153 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 7 May 2024 15:03:42 +0200 Subject: [PATCH 20/34] If multiple output files need to be written, I obviously can't use the max_threads for each. --- README.md | 8 +++---- src/auxiliary.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++- src/umi_external.rs | 17 +++++++------- 3 files changed, 67 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 55e6d75..8292e3c 100644 --- a/README.md +++ b/README.md @@ -17,12 +17,12 @@
-[![License:MIT](https://img.shields.io/badge/License-MIT-491f53.svg)](https://opensource.org/licenses/MIT) +[![License: MIT](https://img.shields.io/badge/License-MIT-491f53.svg)](https://opensource.org/licenses/MIT) ![GitHub Actions Tests](https://img.shields.io/github/actions/workflow/status/SciLifeLab/umi-transfer/.github%2Fworkflows%2Ftesting.yml?branch=dev&logo=github&label=Tests&color=%23a7c947) [![codecov](https://codecov.io/gh/SciLifeLab/umi-transfer/branch/dev/graph/badge.svg)](https://codecov.io/gh/SciLifeLab/umi-transfer) -![GitHub Actions Build](https://img.shields.io/github/actions/workflow/status/SciLifeLab/umi-transfer/.github%2Fworkflows%2Frelease.yml?branch=dev&label=Binary%20builds&logo=github&color=%23a7c947) -[![GitHub Actions Build](https://img.shields.io/github/actions/workflow/status/SciLifeLab/umi-transfer/.github%2Fworkflows%2Fcontainer.yml?branch=dev&label=Docker%20builds&logo=docker&color=%23a7c947)](https://hub.docker.com/r/mzscilifelab/umi-transfer) -[![install with Bioconda](https://img.shields.io/badge/Available%20via-Bioconda-045c64.svg)](https://bioconda.github.io/recipes/umi-transfer/README.html) +[![Build status](https://img.shields.io/github/actions/workflow/status/SciLifeLab/umi-transfer/.github%2Fworkflows%2Frelease.yml?branch=dev&label=Binary%20builds&logo=github&color=%23a7c947)](https://github.com/SciLifeLab/umi-transfer/releases/latest) +[![Docker container status](https://img.shields.io/github/actions/workflow/status/SciLifeLab/umi-transfer/.github%2Fworkflows%2Fcontainer.yml?branch=dev&label=Docker%20builds&logo=docker&color=%23a7c947)](https://hub.docker.com/r/mzscilifelab/umi-transfer) +[![Install with Bioconda](https://img.shields.io/badge/Available%20via-Bioconda-045c64.svg)](https://bioconda.github.io/recipes/umi-transfer/README.html) ## Background diff --git a/src/auxiliary.rs b/src/auxiliary.rs index 81d9099..a1a1d9e 100644 --- a/src/auxiliary.rs +++ b/src/auxiliary.rs @@ -1,4 +1,4 @@ -use std::time::Instant; +use std::{thread,time::Instant}; pub fn timedrun(msg: &str, func: F) -> R where @@ -9,3 +9,57 @@ where println!("{msg} after {:.1} seconds", start.elapsed().as_secs_f32()); measure } + + +pub fn threads_available() -> usize { + thread::available_parallelism() + .map(|cores| cores.get()) + .unwrap_or_else(|_| { + eprintln!( + "Failed to determine number of available threads. Please specify manually with --threads." + ); 1}) +} + +pub fn threads_per_task(available_threads: usize, num_tasks: usize) -> usize { + if available_threads <= 1 || available_threads <= num_tasks { + 1 + } else { + // Subtract 1 for the main thread + let threads_for_tasks = available_threads - 1; + // The result is already always rounded down towards zero for integer divisions using the / operator. + let threads_per_task = threads_for_tasks / num_tasks; + threads_per_task.max(1) + } +} + + + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_threads_available_returns_positive_number() { + let threads = threads_available(); + assert!(threads > 0); + } + + #[test] + fn test_threads_per_task_never_returns_less_than_one() { + let threads_per_task = threads_per_task(1,3); + assert!(threads_per_task == 1); + } + + #[test] + fn test_threads_per_task_splits_even_threads_correctly() { + let threads_per_task = threads_per_task(8,3); + assert!(threads_per_task == 2); + } + + #[test] + fn test_threads_per_task_splits_odd_threads_correctly() { + let threads_per_task = threads_per_task(10,3); + assert!(threads_per_task == 3); + } +} diff --git a/src/umi_external.rs b/src/umi_external.rs index f902c5b..8c43e61 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -1,9 +1,10 @@ use anyhow::{anyhow, Context, Result}; use clap::Parser; use itertools::izip; -use std::{path::PathBuf, thread}; +use std::path::PathBuf; use super::file_io; +use crate::auxiliary::{threads_available,threads_per_task}; use crate::umi_errors::RuntimeErrors; #[derive(Debug, Parser)] pub struct OptsExternal { @@ -101,14 +102,12 @@ pub fn run(args: OptsExternal) -> Result { // Set the number of threads to max, unless manually specified. In case of failure, use only 1. let num_threads = args.num_threads.unwrap_or_else(|| { - thread::available_parallelism() - .map(|cores| cores.get()) - .unwrap_or_else(|_| { - eprintln!( - "Failed to determine number of available threads. Please specify manually with --threads." - ); 1}) + threads_available() }); + // Determine the number of threads available for output file compression. + let threads_per_task = threads_per_task(num_threads, 2); + // Read FastQ records from input files let r1 = file_io::read_fastq(&args.r1_in) .with_context(|| { @@ -157,14 +156,14 @@ pub fn run(args: OptsExternal) -> Result { let mut write_output_r1 = file_io::create_writer( output1, &args.gzip, - &num_threads, + &threads_per_task, &args.compression_level, None, )?; let mut write_output_r2 = file_io::create_writer( output2, &args.gzip, - &num_threads, + &threads_per_task, &args.compression_level, None, )?; From 764fe3819838f5d8a20d722eed43b6817f525949 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 22 May 2024 15:39:20 +0200 Subject: [PATCH 21/34] Alias bio::io::fastq::Reader as FastqReader analogous to FastqWriter. --- src/auxiliary.rs | 15 ++++++--------- src/file_io.rs | 4 ++-- src/umi_external.rs | 6 ++---- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/auxiliary.rs b/src/auxiliary.rs index a1a1d9e..5fdeb11 100644 --- a/src/auxiliary.rs +++ b/src/auxiliary.rs @@ -1,4 +1,4 @@ -use std::{thread,time::Instant}; +use std::{thread, time::Instant}; pub fn timedrun(msg: &str, func: F) -> R where @@ -10,7 +10,6 @@ where measure } - pub fn threads_available() -> usize { thread::available_parallelism() .map(|cores| cores.get()) @@ -25,20 +24,18 @@ pub fn threads_per_task(available_threads: usize, num_tasks: usize) -> usize { 1 } else { // Subtract 1 for the main thread - let threads_for_tasks = available_threads - 1; + let threads_for_tasks = available_threads - 1; // The result is already always rounded down towards zero for integer divisions using the / operator. let threads_per_task = threads_for_tasks / num_tasks; threads_per_task.max(1) } } - - #[cfg(test)] mod tests { use super::*; - + #[test] fn test_threads_available_returns_positive_number() { let threads = threads_available(); @@ -47,19 +44,19 @@ mod tests { #[test] fn test_threads_per_task_never_returns_less_than_one() { - let threads_per_task = threads_per_task(1,3); + let threads_per_task = threads_per_task(1, 3); assert!(threads_per_task == 1); } #[test] fn test_threads_per_task_splits_even_threads_correctly() { - let threads_per_task = threads_per_task(8,3); + let threads_per_task = threads_per_task(8, 3); assert!(threads_per_task == 2); } #[test] fn test_threads_per_task_splits_odd_threads_correctly() { - let threads_per_task = threads_per_task(10,3); + let threads_per_task = threads_per_task(10, 3); assert!(threads_per_task == 3); } } diff --git a/src/file_io.rs b/src/file_io.rs index ec174ef..1a528da 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -1,6 +1,6 @@ use super::umi_errors::RuntimeErrors; use anyhow::{anyhow, Context, Result}; -use bio::io::fastq::{Record, Writer as FastqWriter}; +use bio::io::fastq::{Reader as FastqReader, Record, Writer as FastqWriter}; use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; use gzp::{deflate::Gzip, par::compress::Compression, ZBuilder, ZWriter}; @@ -46,7 +46,7 @@ pub fn read_fastq(path: &PathBuf) -> Result Result { } // Set the number of threads to max, unless manually specified. In case of failure, use only 1. - let num_threads = args.num_threads.unwrap_or_else(|| { - threads_available() - }); + let num_threads = args.num_threads.unwrap_or_else(|| threads_available()); // Determine the number of threads available for output file compression. let threads_per_task = threads_per_task(num_threads, 2); From 8bffbced3c4717285888c4eeb204cff0efeb8621 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 22 May 2024 19:32:15 +0200 Subject: [PATCH 22/34] Prepare the addition of binary test files to tests. --- tests/auxiliary.rs | 29 +++++++++++++++++++ ...integration_tests_external_filecontents.rs | 29 ++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs index c6720b3..b889524 100644 --- a/tests/auxiliary.rs +++ b/tests/auxiliary.rs @@ -4,6 +4,7 @@ use assert_fs::fixture::{NamedTempFile, TempDir}; use assert_fs::prelude::*; use predicates::prelude::*; use std::path::PathBuf; +use std::io::Read; // since those are just needed for the tests, I didn't put it in src. Therefore, using this module is not detected and dead_code warnings issued. #[derive()] @@ -124,3 +125,31 @@ pub fn verify_file_contents(test_file: &PathBuf, reference_file: &PathBuf) -> Re )) } } + + +// Function to compare two files, used to test if the program output matches the reference. +#[allow(dead_code)] +pub fn verify_file_binary(test_file: &PathBuf, reference_file: &PathBuf) -> Result { + + let mut test_file_buf: Vec = Vec::new(); + let mut reference_file_buf: Vec = Vec::new(); + + let mut test_file_handle = std::fs::File::open(&test_file) + .map_err(|err| anyhow!("Failed to read test file: {}", err))?; + let mut reference_file_handle = std::fs::File::open(&reference_file) + .map_err(|err| anyhow!("Failed to read reference file: {}", err))?; + + test_file_handle.read_to_end(&mut test_file_buf)?; + reference_file_handle.read_to_end(&mut reference_file_buf)?; + + if test_file_buf == reference_file_buf { + Ok(true) + } else { + Err(anyhow!( + "{} and {} did not match!", + reference_file.file_name().unwrap().to_string_lossy(), + test_file.file_name().unwrap().to_string_lossy() + )) + } +} + diff --git a/tests/integration_tests_external_filecontents.rs b/tests/integration_tests_external_filecontents.rs index 8ba514a..8178a8c 100644 --- a/tests/integration_tests_external_filecontents.rs +++ b/tests/integration_tests_external_filecontents.rs @@ -1,5 +1,5 @@ use assert_fs::prelude::*; -use auxiliary::verify_file_contents; +use auxiliary::{verify_file_binary,verify_file_contents}; use predicates::prelude::*; use std::error::Error; @@ -33,6 +33,33 @@ fn testing_file_verification_fails() { } // Yep, verify_file_contents() does its job. Ready to rumble! +// Do the same for binary files. + +#[test] +fn testing_file_comparison_succeeds() -> TestResult { + let (mut _cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); + + // the same file should be identical + verify_file_binary(&test_files.read1, &test_files.read1)?; + + temp_dir.close()?; + Ok(()) +} + +#[test] +#[should_panic(expected = "read2.fq and read1.fq did not match!")] +fn testing_file_comparison_fails() { + let (mut _cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); + + // the same file should be identical + verify_file_binary(&test_files.read1, &test_files.read2).unwrap(); + + temp_dir.close().unwrap(); +} + +// Yep, verify_file_contents() does its job. Ready to rumble! + + #[test] fn external_produces_correct_output() -> TestResult { From 8d00d87c4ef4acdb3a23837cd11fc5226bbf424d Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 22 May 2024 20:07:14 +0200 Subject: [PATCH 23/34] Test for compressed output files as well. --- tests/auxiliary.rs | 6 ++- ...integration_tests_external_filecontents.rs | 38 ++++++++++++++++++ tests/results/correct_read1.fq.gz | Bin 0 -> 647 bytes tests/results/correct_read2.fq.gz | Bin 0 -> 682 bytes 4 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 tests/results/correct_read1.fq.gz create mode 100644 tests/results/correct_read2.fq.gz diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs index b889524..54cd09b 100644 --- a/tests/auxiliary.rs +++ b/tests/auxiliary.rs @@ -32,6 +32,8 @@ pub struct TestOutput { // Struct to hold the paths to validated output files. pub correct_read1: PathBuf, pub correct_read2: PathBuf, + pub compressed_correct_read1: PathBuf, + pub compressed_correct_read2: PathBuf, pub corrected_read1: PathBuf, pub corrected_read2: PathBuf, pub delim_underscore_read1: PathBuf, @@ -65,7 +67,7 @@ pub fn setup_integration_test( std::env::current_dir() .expect("Failed to get directory") .join("./tests/results"), - &["*.fq"], + &["*.fq", "*.gz"], ) .expect("Failed to copy result data to temporary directory."); }; @@ -90,6 +92,8 @@ pub fn setup_integration_test( let temp = TestOutput { correct_read1: temp_dir.path().join("correct_read1.fq"), correct_read2: temp_dir.path().join("correct_read2.fq"), + compressed_correct_read1: temp_dir.path().join("correct_read1.fq.gz"), + compressed_correct_read2: temp_dir.path().join("correct_read2.fq.gz"), corrected_read1: temp_dir.path().join("corrected_read1.fq"), corrected_read2: temp_dir.path().join("corrected_read2.fq"), delim_underscore_read1: temp_dir.path().join("delim_underscore_read1.fq"), diff --git a/tests/integration_tests_external_filecontents.rs b/tests/integration_tests_external_filecontents.rs index 8178a8c..39bb68b 100644 --- a/tests/integration_tests_external_filecontents.rs +++ b/tests/integration_tests_external_filecontents.rs @@ -213,3 +213,41 @@ fn external_switch_umi_and_read2() -> TestResult { temp_dir.close()?; Ok(()) } + +#[test] +fn external_produces_correct_compressed_output() -> TestResult { + let (mut cmd, temp_dir, test_files, test_output) = auxiliary::setup_integration_test(true); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1) + .arg("--in2") + .arg(test_files.read2) + .arg("--umi") + .arg(test_files.umi) + .arg("--gzip"); + + cmd.assert().success(); //further assertions have been tested in other tests + + temp_dir + .child("read1_with_UMIs.fq.gz") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_with_UMIs.fq.gz") + .assert(predicate::path::exists()); + + let reference = test_output.unwrap(); + + verify_file_binary( + &temp_dir.child("read1_with_UMIs.fq.gz").to_path_buf(), + &reference.compressed_correct_read1, + )?; + + verify_file_binary( + &temp_dir.child("read2_with_UMIs.fq.gz").to_path_buf(), + &reference.compressed_correct_read2, + )?; + + temp_dir.close()?; + Ok(()) +} \ No newline at end of file diff --git a/tests/results/correct_read1.fq.gz b/tests/results/correct_read1.fq.gz new file mode 100644 index 0000000000000000000000000000000000000000..cfbfbacce36c207c2a45fa61ce2eb42a7d95fc81 GIT binary patch literal 647 zcmV;20(ku&iwFP!00000|IL=cP8&fGMDPBJNI7!Z@!AN_iB2mnArUV0|9?yLs_jTQ zutW|>SoX3W&kX7}Rn`0WnorM9=bz8$$1?6`W+@bz z-^P5I%T!inNUFOCsY0pRMN}$zhpHli@hWBbK2=g?Rh0OhSy9SNuVYOr8TU8wr+6_? zm0l(7?sn(z=g)ub9=GxAkH?46ct8`S*v119K?6iEfQo{}aEUT5Rd65a9TFPg^N36| zR4h=5)pc2D(2D$6Jo9Ee>sXfi#zU!;yp2c2X=j-PBY*UQCld;fzqn5EP_JWk+Q8ShY>*p4{i*`1Z^(N z%72Xd&+M6RkIcWnzKdu7uh<>5vX~1ILXBu6b*xIZf1-qi5kk&v%X!!C1;pvx=Np zo$qXnp1^byowp&}PUmnSnAb5a1%Y)% zj>A}db*lS6(#ad?oIjq#-3ULnZEaX?_Zv1d8?!SD008B6JRtx8 literal 0 HcmV?d00001 diff --git a/tests/results/correct_read2.fq.gz b/tests/results/correct_read2.fq.gz new file mode 100644 index 0000000000000000000000000000000000000000..4dcaf7638cd3bf1148dcfd2d26b0b9ff0e54600e GIT binary patch literal 682 zcmV;b0#*GViwFP!00000|IL=oP9rf8gztHZy|agLCW#WC=xr@6q%0Tu{hwv^RXd0c zFay~OM#}ha9Q?Veocm{gJU>4CKHIOppQip4kI#Aj(_3$CYQ3FKr(JJTJ9PI93v>T5 z_oqJf@JzRe$UC!$wD8D{rCXYrdAO&iS$afzFxQ=aEfYykP1Fk-b6yWOrtp2~G*TZL z)4Ca(yUoMRrN3$PxBXQ%_se*;`~9&M9?Ni9|y^YrC(M0OKla;_Bd}Zc<_RSEt@OYYY;3> zAS*#&0x0<@6B&`c;-oMFA<|Li@LBx|_e&5(a6H6Xd?uRv*N6Mh>yFiEp5?8%^&yE| z77ts6j4mmhMB(xxiujXXl8q{fsMWQ`T0pE|uPaSb8h+y20fyTCp>tyb^~H99TBWnw z5zKSzY+8}9oII+^11Lrpoy4G9`z3hA1c6*`BB?~K;^}&kRN_-nb+MLMewXCgZ+EAn zqf|y3D|9pmDaal5q~;VUK%7VV7$7QDq#15|7F9l18b|4m{GyZb+v)V7iqB16qtm9{ zd>q!&>BU;`U?Zh!ox3)vAB7w}x~j+_m81y`$c~yN+c8>CxeO219mfPv-GZkREZ4sW QS8TcY3*-h8Ml%Zl0J8f>VgLXD literal 0 HcmV?d00001 From f774bf1320082518114101523be5707df4775bcd Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 22 May 2024 20:24:16 +0200 Subject: [PATCH 24/34] Create test for compression level setting. --- tests/auxiliary.rs | 4 + ...integration_tests_external_filecontents.rs | 80 ++++++++++++++++++ tests/results/correct_read1_lvl9.fq.gz | Bin 0 -> 650 bytes tests/results/correct_read2_lvl9.fq.gz | Bin 0 -> 679 bytes 4 files changed, 84 insertions(+) create mode 100644 tests/results/correct_read1_lvl9.fq.gz create mode 100644 tests/results/correct_read2_lvl9.fq.gz diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs index 54cd09b..1a39d48 100644 --- a/tests/auxiliary.rs +++ b/tests/auxiliary.rs @@ -34,6 +34,8 @@ pub struct TestOutput { pub correct_read2: PathBuf, pub compressed_correct_read1: PathBuf, pub compressed_correct_read2: PathBuf, + pub more_compressed_correct_read1: PathBuf, + pub more_compressed_correct_read2: PathBuf, pub corrected_read1: PathBuf, pub corrected_read2: PathBuf, pub delim_underscore_read1: PathBuf, @@ -94,6 +96,8 @@ pub fn setup_integration_test( correct_read2: temp_dir.path().join("correct_read2.fq"), compressed_correct_read1: temp_dir.path().join("correct_read1.fq.gz"), compressed_correct_read2: temp_dir.path().join("correct_read2.fq.gz"), + more_compressed_correct_read1: temp_dir.path().join("correct_read1_lvl9.fq.gz"), + more_compressed_correct_read2: temp_dir.path().join("correct_read2_lvl9.fq.gz"), corrected_read1: temp_dir.path().join("corrected_read1.fq"), corrected_read2: temp_dir.path().join("corrected_read2.fq"), delim_underscore_read1: temp_dir.path().join("delim_underscore_read1.fq"), diff --git a/tests/integration_tests_external_filecontents.rs b/tests/integration_tests_external_filecontents.rs index 39bb68b..ef2d170 100644 --- a/tests/integration_tests_external_filecontents.rs +++ b/tests/integration_tests_external_filecontents.rs @@ -248,6 +248,86 @@ fn external_produces_correct_compressed_output() -> TestResult { &reference.compressed_correct_read2, )?; + temp_dir.close()?; + Ok(()) +} + +#[test] +fn external_produces_correct_compressed_output_mod_compression_level() -> TestResult { + let (mut cmd, temp_dir, test_files, test_output) = auxiliary::setup_integration_test(true); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1) + .arg("--in2") + .arg(test_files.read2) + .arg("--umi") + .arg(test_files.umi) + .arg("--compression_level") + .arg("9") + .arg("--gzip"); + + cmd.assert().success(); //further assertions have been tested in other tests + + temp_dir + .child("read1_with_UMIs.fq.gz") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_with_UMIs.fq.gz") + .assert(predicate::path::exists()); + + let reference = test_output.unwrap(); + + verify_file_binary( + &temp_dir.child("read1_with_UMIs.fq.gz").to_path_buf(), + &reference.more_compressed_correct_read1, + )?; + + verify_file_binary( + &temp_dir.child("read2_with_UMIs.fq.gz").to_path_buf(), + &reference.more_compressed_correct_read2, + )?; + + temp_dir.close()?; + Ok(()) +} + +#[test] +fn external_produces_correct_compressed_output_thread_limit() -> TestResult { + let (mut cmd, temp_dir, test_files, test_output) = auxiliary::setup_integration_test(true); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1) + .arg("--in2") + .arg(test_files.read2) + .arg("--umi") + .arg(test_files.umi) + .arg("--threads") + .arg("3") + .arg("--gzip"); + + cmd.assert().success(); //further assertions have been tested in other tests + + temp_dir + .child("read1_with_UMIs.fq.gz") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_with_UMIs.fq.gz") + .assert(predicate::path::exists()); + + let reference = test_output.unwrap(); + + verify_file_binary( + &temp_dir.child("read1_with_UMIs.fq.gz").to_path_buf(), + &reference.compressed_correct_read1, + )?; + + verify_file_binary( + &temp_dir.child("read2_with_UMIs.fq.gz").to_path_buf(), + &reference.compressed_correct_read2, + )?; + temp_dir.close()?; Ok(()) } \ No newline at end of file diff --git a/tests/results/correct_read1_lvl9.fq.gz b/tests/results/correct_read1_lvl9.fq.gz new file mode 100644 index 0000000000000000000000000000000000000000..91af50ee93816267cc6c2593f4002ac97f44942e GIT binary patch literal 650 zcmV;50(Jc#iwFP!00002|IL=oZsRZvgztWe0(nyqr5F?OfTa=?u!=Yb3{BpgVe_!pj9s0h#$K@lhZ*6G9&{wl8%iOAWZ0;Er=KeIa z_tv-YOt*;0XRA|Mcw}dm9*&~)d^?tJP6qDbIPCxsMm943Vn z$BF|oL`g4_SrG!T`Ug;Cz2-sGGY!YF7apv!HrQhyPcR9%|41%T(ho}AhPWyU!6VG^>#tn~C;C4Uc1NKD_N)9z5X=J_Z(ltL{jZfHl@;4~^%nPF~mc`V~B>nzyD zNLKC;dq|6#V}>Dk%T(6?kSrg?c@E>ehz{3+?YpHNErJ@NZfMd_9-LRu_^Bh8b%jzz z>#)=$%0lc1j6G#j!hUx;?ceAO{XCu4)WSmXnYNE-D@!a>mg~1*%8cnVEa*;?L8h?4 k8$B&(Ek@IUc9?w5%akFh5dLi0+DWmI!voi|-02O{dv;Y7A literal 0 HcmV?d00001 diff --git a/tests/results/correct_read2_lvl9.fq.gz b/tests/results/correct_read2_lvl9.fq.gz new file mode 100644 index 0000000000000000000000000000000000000000..86bcea9605daba67557196085aad43e2a2dd36cf GIT binary patch literal 679 zcmV;Y0$BYYiwFP!00002|IL<5ZsR}L_VGfpWWnYt9AZou@MII-|185QwgOB% zk!&Z62!THRi}+Mk?Uy$@U(ToB7yTvsu9H_kzs2Q`^wRfTo{q=kD0w%DS-85IJq_|I zow!Aqx_dlpUP9eGHkxgrd6?$}%iFN*%&r`3hITHK=WMBgoWMR)4pR)l|I2y#5InJu z>xOThH>ZaWxq6W6_Bu8%%XoJC{h=>BK!f*Q!J{aQXn@6AWf+hc@?@wsk_4$_Pu>S+ zlS&(wE!h;nmWYA1Ez*d83C|tJ8lIu=x~cHw8{^Zh;(-kKp&fe25;2OO-Ff3Im*OZf z<%}vGeYi)9I&Vk@D_L8FdTV&(8vw{>vbqt^wCf5F3SyOuc;E-8duSkv2UXEGoRRV% zH9S)|QN7xX2U%e{#Y+2mUEsAOhsU3zp6NI=b0Gxqu8Vk3D(Xf3#1n)?MbRk~1$~7q z;}YgAdtgW_q@lGU(KsGUmPWq7^WVS_`8-J06(sK$xe?FqFzjYL@It=kvbh|!x-(J? zbRL#~R%b$`w``vgfI#wTj zl@7-k_v`xZZ8J9SOaX?w@uH9CDa4u@tf zem!3c9<8{Uzq?fq@yA_lKovPgw;AQrf{LSPJM`Ao>K6dFdyqAn@u6)?5G)^m53bm9 N^B3d>5=Ju%007C)NlE|! literal 0 HcmV?d00001 From ec8852cf2d2b68b0215730b4071f4d8163361d28 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 22 May 2024 21:22:45 +0200 Subject: [PATCH 25/34] Readme updates to accomodate the new multi-threaded output compression. --- README.md | 122 ++++++++++++++++++++------------------------ src/umi_external.rs | 2 +- 2 files changed, 57 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 8292e3c..728ddc1 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ - [Background on Unique Molecular Identifiers](#background) - [Installing `umi-transfer`](#installation) - [Using `umi-transfer` to integrate UMIs](#usage) -- [Improving performance with external multi-threaded compression](#high-performance-guide) +- [Chaining with other software](#chaining-with-other-software) - [Contributing bugfixes and new features](#contribution-guide-for-developers)
@@ -94,132 +94,122 @@ That should create an executable `target/release/umi-transfer` that can be place ```shell ./target/release/umi-transfer --version -umi-transfer 1.0.0 +umi-transfer 1.5.0 ``` ## Usage ->### Performance Note -> ->The decompression and compression used within umi-transfer is single-threaded, so to get the most reads per minute performance, see the [high performance guide](#high-performance-guide) +The tool requires three FastQ files as input. You can manually specify the names and location of the output files with `--out` and `--out2` or the tool will automatically append a `with_UMI` suffix to your input file names. It additionally accepts to choose a custom UMI delimiter with `--delim` and to set the flags `-f`, `-c` and `-z`. -The tool requires three FastQ files as input. You can manually specify the names and location of the output files with `--out` and `--out2` or the tool will append a `with_UMI` suffix to your input file names as output. It additionally accepts to choose a custom UMI delimiter with `--delim` and to set the flags `-f`, `-c` and `-z`. - -`-c` is used to ensure the canonical `1` and `2` of paired files as read numbers in the output, regardless of the read numbers of the input reads. `-f` / `--force` will overwrite existing output files without prompting the user and `-c` enables the internal single-threaded compression of the output files. Alternatively, you can also specify an output file name with `.gz` suffix to obtain compressed output. +`-c` is used to ensure the canonical `1` and `2` of paired files as read numbers in the output, regardless of the read numbers of the input reads. `-f` / `--force` will overwrite existing output files without prompting the user and `-z` enables the internal compression of the output files. Alternatively, you can also specify an output file name with `.gz` suffix to obtain compressed output. ```raw $ umi-transfer external --help - umi-transfer-external + + Integrate UMIs from a separate FastQ file -USAGE: - umi-transfer external [OPTIONS] --in --in2 --umi +Usage: umi-transfer external [OPTIONS] --in --in2 --umi -OPTIONS: - -c, --correct_numbers Read numbers will be altered to ensure the canonical read numbers 1 and 2 in output file sequence headers. +Options: + -c, --correct_numbers + Read numbers will be altered to ensure the canonical read numbers 1 and 2 in output file sequence headers. - -d, --delim Delimiter to use when joining the UMIs to the read name. Defaults to `:`. - -f, --force Overwrite existing output files without further warnings or prompts. + -z, --gzip + Compress output files. Turned off by default. - -h, --help Print help information - --in [REQUIRED] Input file 1 with reads. + -l, --compression_level + Choose the compression level: Maximum 9, defaults to 3. Higher numbers result in smaller files but take longer to compress. - --in2 [REQUIRED] Input file 2 with reads. + -t, --threads + Number of threads to use for processing. Defaults to the number of logical cores available. - --out Path to FastQ output file for R1. + -f, --force + Overwrite existing output files without further warnings or prompts. - --out2 Path to FastQ output file for R2. + -d, --delim + Delimiter to use when joining the UMIs to the read name. Defaults to `:`. - -u, --umi [REQUIRED] Input file with UMI. - -z, --gzip Compress output files. By default, turned off in favour of external compression. -``` + --in + [REQUIRED] Input file 1 with reads. -### Example -A run with just the mandatory arguments may look like this: + --in2 + [REQUIRED] Input file 2 with reads. -```shell -umi-transfer external -fz -d '_' --in 'R1.fastq' --in2 'R3.fastq' --umi 'R2.fastq' -``` -`umi-transfer` warrants paired input files. To run on singletons, use the same input twice and redirect one output to `/dev/null`: + -u, --umi + [REQUIRED] Input file with UMI. -```shell -umi-transfer external --in read1.fastq --in2 read1.fastq --umi read2.fastq --out output1.fastq --out2 /dev/null -``` -### High Performance Guide + --out + Path to FastQ output file for R1. -The performance bottleneck of UMI integration is output file compression. [Parallel Gzip](https://github.com/madler/pigz) can be used on modern multi-processor, multi-core machines to significantly outclass the single-threaded compression that ships with `umi-transfer`. -We recommend using Unix FIFOs (First In, First Out buffered pipes) to combine `umi-transfer` and `pigz` on GNU/Linux and MacOS operating systems: + --out2 + Path to FastQ output file for R2. -```shell -mkfifo read1.fastq -mkfifo read2.fastq -mkfifo read3.fastq + + -h, --help + Print help + -V, --version + Print version ``` -Assuming your compressed input files are called `read1.fastq.gz` and `read2.fastq.gz` and `read3.fastq.gz`, each can be linked to its respective FIFO like so: +### Example + +A typical run may look like this: ```shell -$ pigz -dc read1.fastq.gz > read1.fastq & -[1] 233387 -$ pigz -dc read2.fastq.gz > read2.fastq & -[2] 233388 -$ pigz -dc read3.fastq.gz > read3.fastq & -[3] 233389 +umi-transfer external -fz -d '_' --in 'R1.fastq' --in2 'R3.fastq' --umi 'R2.fastq' ``` -Note the trailing `&` to leave these processes running in the background. Since multi-threading is hardly helpful for decompression, you could also use `zcat` or `gzip -dc` instead of `pigz -dc` here. - -We can inspect the directory with `ls` to list the compressed files and the created FIFOs: +`umi-transfer` warrants paired input files. To run on singletons, use the same input twice and redirect one output to `/dev/null`: ```shell -$ ls -lh -total 1.5K --rw-rw----. 1 alneberg ngisweden 4.5G Apr 13 12:18 read1.fastq.gz --rw-rw----. 1 alneberg ngisweden 1.1G Apr 13 12:18 read2.fastq.gz --rw-rw----. 1 alneberg ngisweden 4.5G Apr 13 12:18 read3.fastq.gz -prw-rw-r--. 1 alneberg ngisweden 0 Apr 13 12:46 read1.fastq -prw-rw-r--. 1 alneberg ngisweden 0 Apr 13 12:46 read2.fastq -prw-rw-r--. 1 alneberg ngisweden 0 Apr 13 12:46 read3.fastq +umi-transfer external --in read1.fastq --in2 read1.fastq --umi read2.fastq --out output1.fastq --out2 /dev/null ``` -We continue to create FIFOs for the output files: +### Chaining with other software + +`umi-transfer` cannot be used with the pipe operator, because it neither supports writing output to `stdout` nor reading input from `stdin`. However, FIFOs (_First In, First Out buffered pipes_) can be used to elegantly combine `umi-transfer` with other software on GNU/Linux and MacOS operating systems. + +For example, we may want to use external compression software like [Parallel Gzip](https://github.com/madler/pigz) together with `umi-transfer`. For this purpose, it would be unfavorable to write the data uncompressed to disk before compressing it. Instead, we create named pipes with `mkfifo`, which can be provided to `umi-transfer` as if they were regular output file paths. In reality, the data is directly passed on to `pigz` via a buffered stream. + +First, the named pipes are created: ```shell -mkfifo output1.fastq -mkfifo output2.fastq +mkfifo output1 +mkfifo output2 ``` -and set-up a multi-threaded `pigz` compression process each: +Then a multi-threaded `pigz` compression is tied to the FIFO. Note the trailing `&` to leave these processes running in the background. ```shell -$ pigz -p 10 -c > output1.fastq.gz < output1.fastq & +$ pigz -p 10 -c > output1.fastq.gz < output1 & [4] 233394 -$ pigz -p 10 -c > output2.fastq.gz < output2.fastq & +$ pigz -p 10 -c > output2.fastq.gz < output2 & [5] 233395 ``` The argument `-p 10` specifies the number of threads that each `pigz` processes may use. The optimal setting is hardware-specific and will require some testing. -Finally, we can then run `umi-transfer` using the FIFOs like so: +Finally, we can run `umi-transfer` using the FIFOs as output paths: ```shell -umi-transfer external --in read1.fastq --in2 read3.fastq --umi read2.fastq --out output1.fastq --out2 output2.fastq +umi-transfer external --in read1.fastq --in2 read3.fastq --umi read2.fastq --out output1 --out2 output2 ``` It's good practice to remove the FIFOs after the program has finished: ```shell -rm read1.fastq read2.fastq read3.fastq output1.fastq output2.fastq +rm output1.fastq output2.fastq ``` ## Contribution guide for developers diff --git a/src/umi_external.rs b/src/umi_external.rs index 45c028b..ea17c97 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -18,7 +18,7 @@ pub struct OptsExternal { #[clap( short = 'z', long = "gzip", - help = "Compress output files. By default, turned off in favour of external compression. + help = "Compress output files. Turned off by default. \n " )] gzip: bool, From 690e31f7e741db3964d87f59a4133130096137b6 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 22 May 2024 21:23:33 +0200 Subject: [PATCH 26/34] Code formatting. --- .github/workflows/container.yml | 4 ++-- .github/workflows/testing.yml | 2 +- tests/auxiliary.rs | 5 +---- tests/integration_tests_external_filecontents.rs | 6 ++---- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index ead90ef..a233b27 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -27,13 +27,13 @@ jobs: echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - name: Log in to Docker Hub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Login to GitHub Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.repository_owner }} diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 9c08743..04b3d46 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -88,7 +88,7 @@ jobs: continue-on-error: true - name: Create an artifact from clippy results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: ClippyResults path: rust-clippy-results.sarif diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs index 1a39d48..936e100 100644 --- a/tests/auxiliary.rs +++ b/tests/auxiliary.rs @@ -3,8 +3,8 @@ use assert_cmd::Command; use assert_fs::fixture::{NamedTempFile, TempDir}; use assert_fs::prelude::*; use predicates::prelude::*; -use std::path::PathBuf; use std::io::Read; +use std::path::PathBuf; // since those are just needed for the tests, I didn't put it in src. Therefore, using this module is not detected and dead_code warnings issued. #[derive()] @@ -134,11 +134,9 @@ pub fn verify_file_contents(test_file: &PathBuf, reference_file: &PathBuf) -> Re } } - // Function to compare two files, used to test if the program output matches the reference. #[allow(dead_code)] pub fn verify_file_binary(test_file: &PathBuf, reference_file: &PathBuf) -> Result { - let mut test_file_buf: Vec = Vec::new(); let mut reference_file_buf: Vec = Vec::new(); @@ -160,4 +158,3 @@ pub fn verify_file_binary(test_file: &PathBuf, reference_file: &PathBuf) -> Resu )) } } - diff --git a/tests/integration_tests_external_filecontents.rs b/tests/integration_tests_external_filecontents.rs index ef2d170..eb32ad1 100644 --- a/tests/integration_tests_external_filecontents.rs +++ b/tests/integration_tests_external_filecontents.rs @@ -1,5 +1,5 @@ use assert_fs::prelude::*; -use auxiliary::{verify_file_binary,verify_file_contents}; +use auxiliary::{verify_file_binary, verify_file_contents}; use predicates::prelude::*; use std::error::Error; @@ -59,8 +59,6 @@ fn testing_file_comparison_fails() { // Yep, verify_file_contents() does its job. Ready to rumble! - - #[test] fn external_produces_correct_output() -> TestResult { let (mut cmd, temp_dir, test_files, test_output) = auxiliary::setup_integration_test(true); @@ -330,4 +328,4 @@ fn external_produces_correct_compressed_output_thread_limit() -> TestResult { temp_dir.close()?; Ok(()) -} \ No newline at end of file +} From e058ec6440bf701af2dfd6f3874a635601e85843 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 22 May 2024 21:52:49 +0200 Subject: [PATCH 27/34] Bump version in main. --- src/main.rs | 4 ++-- src/umi_external.rs | 2 +- tests/auxiliary.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main.rs b/src/main.rs index d8e0598..8b09ced 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,8 +25,8 @@ https://github.com/SciLifeLab/umi-transfer #[derive(clap::Parser)] #[clap( - version = "1.5.0dev", - author = "Written by Judit Hohenthal, Matthias Zepper & Johannes Alneberg", + version = "1.5.0", + author = "Written by Matthias Zepper, Judit Hohenthal & Johannes Alneberg", about = "A tool for transferring Unique Molecular Identifiers (UMIs).", long_about = "Most tools capable of using UMIs to increase the accuracy of quantitative DNA sequencing experiments expect the respective UMI sequence to be embedded into the reads' IDs. You can use `umi-transfer external` to retrieve UMIs from a separate FastQ file and embed them to the IDs of your paired FastQ files." )] diff --git a/src/umi_external.rs b/src/umi_external.rs index ea17c97..21ca7f1 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -101,7 +101,7 @@ pub fn run(args: OptsExternal) -> Result { } // Set the number of threads to max, unless manually specified. In case of failure, use only 1. - let num_threads = args.num_threads.unwrap_or_else(|| threads_available()); + let num_threads = args.num_threads.unwrap_or_else(threads_available); // Determine the number of threads available for output file compression. let threads_per_task = threads_per_task(num_threads, 2); diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs index 936e100..13ab695 100644 --- a/tests/auxiliary.rs +++ b/tests/auxiliary.rs @@ -140,9 +140,9 @@ pub fn verify_file_binary(test_file: &PathBuf, reference_file: &PathBuf) -> Resu let mut test_file_buf: Vec = Vec::new(); let mut reference_file_buf: Vec = Vec::new(); - let mut test_file_handle = std::fs::File::open(&test_file) + let mut test_file_handle = std::fs::File::open(test_file) .map_err(|err| anyhow!("Failed to read test file: {}", err))?; - let mut reference_file_handle = std::fs::File::open(&reference_file) + let mut reference_file_handle = std::fs::File::open(reference_file) .map_err(|err| anyhow!("Failed to read reference file: {}", err))?; test_file_handle.read_to_end(&mut test_file_buf)?; From 2e575a79d980db3ec2f71ab587f9266d7ebd6073 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 23 May 2024 15:13:24 +0200 Subject: [PATCH 28/34] Bump Docker build action. --- .github/workflows/container.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index a233b27..cf3dc02 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -40,7 +40,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Push dev image - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v5 if: github.event_name == 'push' with: push: true @@ -49,7 +49,7 @@ jobs: ghcr.io/${{ env.REPO_LOWERCASE }}:dev - name: Push release image - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v5 if: github.event_name == 'release' with: push: true From d568f01dfc8ee06c6741aa38b17daaa75f279fae Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 24 May 2024 21:40:26 +0200 Subject: [PATCH 29/34] Update Debian disto in Docker image from bullseye to bookworm (stable). --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index de1b22b..3f8a522 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM rust:latest as buildenv +FROM rust:bookworm as buildenv WORKDIR /usr/app/src COPY ./ /usr/app/src @@ -11,7 +11,7 @@ RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/rust/target \ cargo build --release -FROM debian:bullseye-slim as runner +FROM debian:bookworm-slim as runner WORKDIR /root COPY --from=buildenv /usr/app/src/target/release/ /usr/local/bin/ RUN chmod 755 /usr/local/bin/umi-transfer From caaaf2bb663d00181a8cd6a8b5c492855c25a90f Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 24 May 2024 21:49:30 +0200 Subject: [PATCH 30/34] Push dev container image also when workflow is manually dispatched. --- .github/workflows/container.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index cf3dc02..39062f7 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -41,7 +41,7 @@ jobs: - name: Push dev image uses: docker/build-push-action@v5 - if: github.event_name == 'push' + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' with: push: true tags: | From 152c1b4b568b584c33d2ea5986f41cca864bbfb8 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 27 May 2024 20:04:55 +0200 Subject: [PATCH 31/34] Added benchmark results to readme. --- README.md | 19 + docs/img/benchmark_umi-transfer-threads.svg | 616 ++++++++++++++ docs/img/benchmark_umi-transfer-version.svg | 877 ++++++++++++++++++++ 3 files changed, 1512 insertions(+) create mode 100644 docs/img/benchmark_umi-transfer-threads.svg create mode 100644 docs/img/benchmark_umi-transfer-version.svg diff --git a/README.md b/README.md index 728ddc1..2460b72 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ - [Background on Unique Molecular Identifiers](#background) - [Installing `umi-transfer`](#installation) - [Using `umi-transfer` to integrate UMIs](#usage) +- [Benchmarks and parameter recommendations](#benchmarks-and-parameter-recommendations) - [Chaining with other software](#chaining-with-other-software) - [Contributing bugfixes and new features](#contribution-guide-for-developers) @@ -176,6 +177,24 @@ umi-transfer external -fz -d '_' --in 'R1.fastq' --in2 'R3.fastq' --umi 'R2.fast umi-transfer external --in read1.fastq --in2 read1.fastq --umi read2.fastq --out output1.fastq --out2 /dev/null ``` +### Benchmarks and parameter recommendations + +A known shortcoming of version 1.0 of `umi-transfer` was the purely single-threaded output file compression, which significantly slowed down the tool. To mitigate this, we recommended using FIFOs and piping the uncompressed output to a dedicated compression tool like [`pigz`](https://github.com/madler/pigz). + +With the release of version 1.5, `umi-transfer` features internal multi-threaded output compression. As a result, `umi-transfer` 1.5 now runs approximately 25 times faster than version 1.0 when using internal compression and about twice as fast compared to using an external compression tool. This improvement is enabled by the outstanding [`gzp` crate](https://github.com/sstadick/gzp), which abstracts a lot of the underlying complexity away from the main software. + +![Benchmark of different tool versions](docs/img/benchmark_umi-transfer-version.svg) + +In our first benchmark using 17 threads, version 1.5 of `umi-transfer` processed approximately 550,000 paired records per second with the default gzip compression level of 3. At the highest compression level of 9, the rate dropped to just below 200,000 records per second. While the exact numbers may vary depending on your storage, file system, and processors, we expect the relative performance rates to remain approximately constant. + +![Benchmark of thread numbers](docs/img/benchmark_umi-transfer-threads.svg) + +In a subsequent benchmark, we tested the effect of increasing the number of threads. For the default compression level, the maximum speed was achieved with 9 to 11 threads. Since umi-transfer writes two output files simultaneously, this configuration allows for 4 to 5 threads per file to handle the output compression. + +Adding more threads per file proved unhelpful, as other steps became the rate-limiting factors. These factors include file system I/O, input file decompression, and the actual editing of the file contents, which now determine the performance of umi-transfer. Only when increasing the compression level to higher settings did adding more threads continue to provide a performance benefit. For the highest compression setting, we did not reach the plateau phase during the benchmark, but it is likely to occur in the range of 53-55 total threads, or about 26 threads per output file. + +**In summary, we recommend running `umi-transfer` with 9 or 11 threads for compression. Odd numbers are favorable as they allow one dedicated main thread, while evenly splitting the remaining threads between the two output files. It's important to note that specifying more threads than the available physical or logical cores on your machine will result in a severe performance loss, since `umi-transfer` operates synchronously.** + ### Chaining with other software `umi-transfer` cannot be used with the pipe operator, because it neither supports writing output to `stdout` nor reading input from `stdin`. However, FIFOs (_First In, First Out buffered pipes_) can be used to elegantly combine `umi-transfer` with other software on GNU/Linux and MacOS operating systems. diff --git a/docs/img/benchmark_umi-transfer-threads.svg b/docs/img/benchmark_umi-transfer-threads.svg new file mode 100644 index 0000000..fef09e6 --- /dev/null +++ b/docs/img/benchmark_umi-transfer-threads.svg @@ -0,0 +1,616 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/img/benchmark_umi-transfer-version.svg b/docs/img/benchmark_umi-transfer-version.svg new file mode 100644 index 0000000..82fe93d --- /dev/null +++ b/docs/img/benchmark_umi-transfer-version.svg @@ -0,0 +1,877 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From bf57a713df2defb496492ccf5d5bfe4853f47b3a Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 28 May 2024 15:06:18 +0200 Subject: [PATCH 32/34] Update CLI help text for maximum thread number. --- src/umi_external.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/umi_external.rs b/src/umi_external.rs index 21ca7f1..fec0983 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -32,7 +32,7 @@ pub struct OptsExternal { #[clap( short = 't', long = "threads", - help = "Number of threads to use for processing. Defaults to the number of logical cores available. + help = "Maximum number of threads to use for processing. Preferably pick odd numbers, 9 or 11 recommended. Defaults to the maximum number of cores available. \n " )] num_threads: Option, From 35856612c66b23c79ccb95c31c71ee9a43d17544 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 3 Jun 2024 21:06:47 +0200 Subject: [PATCH 33/34] Small fixes in the Readme. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2460b72..9588e18 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ This tool efficiently integrates these separate UMIs into the headers and can al ### Binary Installation -Binaries for `umi-transfer` are available for most platforms and can be obtained from the [Releases page on GitHub](https://github.com/SciLifeLab/umi-transfer/releases). Simply navigate to the Releases page and download the appropriate binary of a release for your operating system. Once downloaded, you can place it in a directory of your choice and [optionally add the binary to your system's `$PATH`](https://astrobiomike.github.io/unix/modifying_your_path). +Binaries for `umi-transfer` are available for most platforms and can be obtained from the [_Releases_ page on GitHub](https://github.com/SciLifeLab/umi-transfer/releases). Simply navigate to the releases and download the appropriate binary for your operating system. Once downloaded, you can place it in a directory of your choice and [optionally add the binary to your system's `$PATH`](https://astrobiomike.github.io/unix/modifying_your_path). ### Bioconda @@ -85,7 +85,7 @@ alias umi-transfer="docker run -t -v `pwd`:`pwd` -w `pwd` mzscilifelab/umi-trans ### Compile from source -Given that you have [rust installed](https://www.rust-lang.org/tools/install) on your computer, download this repository and run +Given that you have [Rust installed](https://www.rust-lang.org/tools/install) on your computer, clone or download this repository and run ```shell cargo build --release From 4af3d9b65a4ec76c1eac556c4204f9a4137271b5 Mon Sep 17 00:00:00 2001 From: Matthias Zepper <6963520+MatthiasZepper@users.noreply.github.com> Date: Mon, 17 Jun 2024 18:03:37 +0200 Subject: [PATCH 34/34] Update README.md Co-authored-by: Johannes Alneberg --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 9588e18..06208b9 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,6 @@ umi-transfer external --in read1.fastq --in2 read1.fastq --umi read2.fastq --out ### Benchmarks and parameter recommendations -A known shortcoming of version 1.0 of `umi-transfer` was the purely single-threaded output file compression, which significantly slowed down the tool. To mitigate this, we recommended using FIFOs and piping the uncompressed output to a dedicated compression tool like [`pigz`](https://github.com/madler/pigz). With the release of version 1.5, `umi-transfer` features internal multi-threaded output compression. As a result, `umi-transfer` 1.5 now runs approximately 25 times faster than version 1.0 when using internal compression and about twice as fast compared to using an external compression tool. This improvement is enabled by the outstanding [`gzp` crate](https://github.com/sstadick/gzp), which abstracts a lot of the underlying complexity away from the main software.