diff --git a/.github/workflows/clippy.yml b/.github/workflows/clippy.yml new file mode 100644 index 0000000..03b0edd --- /dev/null +++ b/.github/workflows/clippy.yml @@ -0,0 +1,48 @@ +# Clippy is a tool that runs a bunch of tests on the Rust code to catch common mistakes and discouraged patterns. + +name: Analyze with Rust Clippy + +on: + push: + branches: + - main + - dev + pull_request: + branches: + - main + - dev + +jobs: + rust-clippy-analyze: + name: Run rust-clippy analyzing + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Install Rust toolchain + uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af #@v1 + with: + profile: minimal + toolchain: stable + components: clippy + override: true + + - name: Install required cargo + run: cargo install clippy-sarif sarif-fmt + + - name: Run rust-clippy + run: + cargo clippy + --all-features + --message-format=json | clippy-sarif | tee rust-clippy-results.sarif | sarif-fmt + continue-on-error: true + + - name: Upload analysis results to GitHub + uses: github/codeql-action/upload-sarif@v1 + with: + sarif_file: rust-clippy-results.sarif + wait-for-processing: true \ No newline at end of file diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/dockerhub.yml new file mode 100644 index 0000000..064e2ce --- /dev/null +++ b/.github/workflows/dockerhub.yml @@ -0,0 +1,60 @@ +name: Publish to Dockerhub + +on: + push: + branches: + - main + - 'releases/**' + - AddCIpipelines + tags: + - 'v*' #if a push with a version tag like v0.0.2 is recorded + + release: + types: [published] + +jobs: + push_to_registry: + if: github.repository == 'SciLifeLab/umi-transfer' + name: Push Docker image to Docker Hub / GitHub Docker Registry + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v2 + + - name: Change repo name to lowercase + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV} + + - name: Log in to Docker Hub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Push dev image + uses: docker/build-push-action@v2 + if: github.event_name == 'push' + with: + push: true + tags: | + ${{ secrets.DOCKERHUB_USERNAME }}/umi-transfer:dev + ghcr.io/${{ env.REPO_LOWERCASE }}:dev + + - name: Push release image + uses: docker/build-push-action@v2 + if: github.event_name == 'release' + with: + push: true + tags: | + ${{ secrets.DOCKERHUB_USERNAME }}/umi-transfer:${{ github.event.release.tag_name }} + ${{ secrets.DOCKERHUB_USERNAME }}/umi-transfer:latest + ghcr.io/${{ env.REPO_LOWERCASE }}:${{ github.event.release.tag_name }} + ghcr.io/${{ env.REPO_LOWERCASE }}:${{ github.sha }} + ghcr.io/${{ env.REPO_LOWERCASE }}:latest diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..d79b62c --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,21 @@ +name: Test successful compilation on Rust + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + target: wasm32-unknown-unknown + - name: Build + uses: actions-rs/cargo@v1 + with: + command: build diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..703d3d0 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,898 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704" + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bio" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a69423e30444738eccc5e54eccee75779dd3f15ecc0469b95d8529d4b6b7586" +dependencies = [ + "anyhow", + "approx", + "bio-types", + "bit-set", + "bv", + "bytecount", + "csv", + "custom_derive", + "enum-map", + "fxhash", + "getset", + "itertools", + "itertools-num", + "lazy_static", + "multimap", + "ndarray", + "newtype_derive", + "num-integer", + "num-traits", + "ordered-float", + "petgraph", + "rand", + "regex", + "serde", + "serde_derive", + "statrs", + "strum", + "strum_macros 0.23.1", + "thiserror", + "triple_accel", + "vec_map", +] + +[[package]] +name = "bio-types" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfa990f40a28735fa598dc3dd58d73e62e6b41458959d623903b927ba7b04c80" +dependencies = [ + "derive-new", + "lazy_static", + "regex", + "strum_macros 0.24.2", + "thiserror", +] + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bv" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8834bb1d8ee5dc048ee3124f2c7c1afcc6bc9aed03f11e9dfd8c69470a5db340" +dependencies = [ + "feature-probe", + "serde", +] + +[[package]] +name = "bytecount" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "3.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44bbe24bbd31a185bc2c4f7c2abe80bea13a20d57ee4e55be70ac512bdc76417" +dependencies = [ + "atty", + "bitflags", + "clap_derive", + "clap_lex", + "indexmap", + "once_cell", + "strsim", + "termcolor", + "textwrap", +] + +[[package]] +name = "clap_derive" +version = "3.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ba52acd3b0a5c33aeada5cdaa3267cdc7c594a98731d4268cdc1532f4264cb4" +dependencies = [ + "heck 0.4.0", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "custom_derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" + +[[package]] +name = "derive-new" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" + +[[package]] +name = "enum-map" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e893a7ba6116821058dec84a6fb14fb2a97cd8ce5fd0f85d5a4e760ecd7329d9" +dependencies = [ + "enum-map-derive", +] + +[[package]] +name = "enum-map-derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84278eae0af6e34ff6c1db44c11634a694aafac559ff3080e4db4e4ac35907aa" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "feature-probe" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "835a3dc7d1ec9e75e2b5fb4ba75396837112d2060b03f7d43bc1897c7f7211da" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getrandom" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getset" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heck" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "indexmap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "itertools" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +dependencies = [ + "either", +] + +[[package]] +name = "itertools-num" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a872a22f9e6f7521ca557660adb96dd830e54f0f490fa115bb55dd69d38b27e7" +dependencies = [ + "num-traits", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "libm" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33a33a362ce288760ec6a508b94caaec573ae7d3bbbd91b87aa0bad4456839db" + +[[package]] +name = "matrixmultiply" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "add85d4dd35074e6fedc608f8c8f513a3548619a9024b751949ef0e8e45a4d84" +dependencies = [ + "rawpointer", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "multimap" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +dependencies = [ + "serde", +] + +[[package]] +name = "nalgebra" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "462fffe4002f4f2e1f6a9dcf12cc1a6fc0e15989014efc02a941d3e0f5dc2120" +dependencies = [ + "approx", + "matrixmultiply", + "nalgebra-macros", + "num-complex", + "num-rational", + "num-traits", + "rand", + "rand_distr", + "simba", + "typenum", +] + +[[package]] +name = "nalgebra-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ndarray" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec23e6762830658d2b3d385a75aa212af2f67a4586d4442907144f3bb6a1ca8" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "newtype_derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac8cd24d9f185bb7223958d8c1ff7a961b74b1953fd05dba7cc568a63b3861ec" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "num-complex" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "once_cell" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" + +[[package]] +name = "ordered-float" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" +dependencies = [ + "num-traits", +] + +[[package]] +name = "os_str_bytes" +version = "6.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4" + +[[package]] +name = "paste" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" + +[[package]] +name = "petgraph" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c278e965f1d8cf32d6e0e96de3d3e79712178ae67986d9cf9151f51e95aac89b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + +[[package]] +name = "rustc_version" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084" +dependencies = [ + "semver", +] + +[[package]] +name = "rustversion" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24c8ad4f0c00e1eb5bc7614d236a7f1300e3dbd76b68cac8e06fb00b015ad8d8" + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + +[[package]] +name = "semver" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" + +[[package]] +name = "serde" +version = "1.0.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc855a42c7967b7c369eb5860f7164ef1f6f81c20c7cc1141f2a604e18723b03" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f2122636b9fe3b81f1cb25099fcf2d3f542cdb1d45940d56c713158884a05da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "simba" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e82063457853d00243beda9952e910b82593e4b07ae9f721b9278a99a0d3d5c" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", +] + +[[package]] +name = "statrs" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05bdbb8e4e78216a85785a85d3ec3183144f98d0097b9281802c019bb07a6f05" +dependencies = [ + "approx", + "lazy_static", + "nalgebra", + "num-traits", + "rand", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "strum" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" + +[[package]] +name = "strum_macros" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" +dependencies = [ + "heck 0.3.3", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "strum_macros" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4faebde00e8ff94316c01800f9054fd2ba77d30d9e922541913051d1d978918b" +dependencies = [ + "heck 0.4.0", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "syn" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" + +[[package]] +name = "thiserror" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "triple_accel" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22048bc95dfb2ffd05b1ff9a756290a009224b60b2f0e7525faeee7603851e63" + +[[package]] +name = "typenum" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" + +[[package]] +name = "umi-transfer" +version = "0.1.0" +dependencies = [ + "bio", + "clap", + "lazy_static", + "regex", +] + +[[package]] +name = "unicode-ident" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" + +[[package]] +name = "unicode-segmentation" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" +dependencies = [ + "serde", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..db354c6 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "umi-transfer" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "3.2.10", features = ["derive"] } +bio = "0.41.0" +regex = "1.6.0" +lazy_static = "1.4" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4d1e92f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM rust:latest as buildenv + +WORKDIR /usr/app/src +COPY ./ /usr/app/src + +RUN apt-get update && apt-get -y install clang cmake && \ + rm -rf /var/lib/apt/lists/* && \ + rustup component add rustfmt + +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/rust/target \ + cargo build --release + +FROM debian:bullseye-slim as runner +WORKDIR /root +COPY --from=buildenv /usr/app/src/target/release/ /usr/local/bin/ +RUN chmod +x /usr/local/bin/umi-transfer +CMD /usr/local/bin/umi-transfer diff --git a/README.md b/README.md new file mode 100644 index 0000000..66df55b --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# Building + +Go to the directory with the tool and type in `cargo build` . + +# Running + +### Usage + +The tool requires an input as follows: + +> `umi-transfer [OPTIONS] `
+ +`OPTIONS:` +| Flag | Required | Description | +| ------------- | :-----------: | ----------: | +| `-h`,`--help` | No | Print help information | +| `--prefix` | No, but default will be '`integrated`' | dictates name of output files| +| `--r1-in` | Yes | FASTQ file with reads| +| `--r2-in` | No | FASTQ file with reads | + +`SUBCOMMANDS: ` + +> `inline:` +> +> > | Flag | Required | Description | +> > | ------------ | :------------------------: | -------------------------: | +> > | `--pattern1` | Yes | Nucleotide Pattern for UMI | +> > | `--pattern2` | Needed if `--r2-in` exists | Nucleotide Pattern for UMI | +> +> `separate:` +> +> > | Flag | Required | Description | +> > | --------- | :------: | ---------------------------: | +> > | `--ru-in` | Yes | FASTQ containing UMI records | + +Running the tool can be done by `cargo run --release -- [options] --r1-in 'fastq' `, where the `--release` flag is optional, but will ensure an optimized build.
+ +### Inline UMI extraction example: + +`cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R2.fastq' inline --pattern1 'NNNNNNNNN' --pattern2 'NNNNNNNNN'` + +### UMI in seperate file example: + +`cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R3.fastq' separate --ru-in 'R2.fastq'` diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..9c55991 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,192 @@ +use clap::Parser; +use std::iter::Iterator; +use std::thread; + +lazy_static::lazy_static! { +static ref UMI_PATTERN: regex::Regex = regex::Regex::new("^(N{2,})([ATCG]*)$").unwrap(); +} +struct Nucleotide { + offset: usize, + spacer: String, +} +enum ExtractedRecord { + Empty, + Valid { + read: bio::io::fastq::Record, + umi: Vec, + }, +} +fn read_fastq( + path: &std::string::String, +) -> bio::io::fastq::Reader> { + std::fs::File::open(path) + .map(bio::io::fastq::Reader::new) + .unwrap() +} +fn output_file(name: &str) -> bio::io::fastq::Writer { + std::fs::File::create(format!("{}.fastq", name)) + .map(bio::io::fastq::Writer::new) + .unwrap() +} + +#[derive(clap::Parser)] +struct Opts { + #[clap(long, default_value = "integrated")] + prefix: String, + #[clap(long, required = true)] + r1_in: Vec, + #[clap(long)] + r2_in: Vec, + #[clap(subcommand)] + sub: Commands, +} + +#[derive(clap::Subcommand)] +enum Commands { + #[clap(name = "separate")] + Separate { + #[clap(long, required = true)] + ru_in: Vec, + }, + #[clap(name = "inline")] + Inline { + #[clap(long, required = true)] + pattern1: String, + #[clap(long)] + pattern2: Option, + }, +} + +fn write_to_file( + input: bio::io::fastq::Record, + mut output: bio::io::fastq::Writer, + umi: &[u8], + second: bool, +) -> bio::io::fastq::Writer { + let s = input; + if second { + let header = &[s.id(), ":", std::str::from_utf8(&umi).unwrap()].concat(); + let mut string = String::from(s.desc().unwrap()); + string.replace_range(0..1, "2"); + let desc: Option<&str> = Some(&string); + output.write(&header, desc, s.seq(), s.qual()).unwrap(); + } else { + let header = &[s.id(), ":", std::str::from_utf8(&umi).unwrap()].concat(); + output.write(&header, s.desc(), s.seq(), s.qual()).unwrap(); + } + output +} +fn parse(pattern: &str) -> Option { + if let Some(captures) = UMI_PATTERN.captures(pattern) { + Some(Nucleotide { + offset: captures.get(1)?.end(), + spacer: captures.get(2)?.as_str().into(), + }) + } else { + panic!("") + } +} +fn extract(record: bio::io::fastq::Record, pattern: &str) -> ExtractedRecord { + let handler = parse(pattern); + match handler { + Some(Nucleotide { offset, spacer }) => { + let end = offset + spacer.len(); + if end <= record.seq().len() && record.seq()[offset..end] == *spacer.as_bytes() { + let read = bio::io::fastq::Record::with_attrs( + record.id(), + record.desc(), + record.seq()[end..record.seq().len()].into(), + record.qual()[end..record.qual().len()].into(), + ); + ExtractedRecord::Valid { + read: read, + umi: record.seq()[0..offset].into(), + } + } else { + ExtractedRecord::Empty + } + } + None => panic!(""), + } +} +fn write_inline_to_file( + record: ExtractedRecord, + write_file: bio::io::fastq::Writer, + second: bool, +) -> bio::io::fastq::Writer { + match record { + ExtractedRecord::Empty => panic!("Not Valid UMI/ Record"), + ExtractedRecord::Valid { read, umi } => write_to_file(read, write_file, &umi, second), + } +} +fn main() { + let args = Opts::parse(); + + // Create write files + let mut write_file_r1 = output_file(&format!("{}1", &args.prefix)); + + // read supplied files + let r1 = read_fastq(&args.r1_in[0]).records(); + match args.sub { + Commands::Separate { ru_in } => { + let ru1 = ru_in.clone(); + let handle1 = thread::spawn(move || { + let ru = read_fastq(&ru_in[0]).records(); + for (r1_rec, ru_rec) in r1.zip(ru) { + write_file_r1 = + write_to_file(r1_rec.unwrap(), write_file_r1, ru_rec.unwrap().seq(), false); + } + }); + let mut l = Vec::new(); + l.push(handle1); + if !&args.r2_in.is_empty() { + let r2 = read_fastq(&args.r2_in[0]).records(); + let mut write_file_r2 = output_file(&format!("{}2", &args.prefix)); + let handle2 = thread::spawn(move || { + let ru = read_fastq(&ru1[0]).records(); + for (r2_rec, ru_rec) in r2.zip(ru) { + write_file_r2 = write_to_file( + r2_rec.unwrap(), + write_file_r2, + ru_rec.unwrap().seq(), + true, + ); + } + }); + l.push(handle2); + } + for i in l { + if !i.is_finished() { + i.join().unwrap(); + } + } + } + Commands::Inline { pattern1, pattern2 } => { + let handle1 = thread::spawn(move || { + for r1_rec in r1 { + let record1 = extract(r1_rec.unwrap(), &pattern1); + write_file_r1 = write_inline_to_file(record1, write_file_r1, false); + } + }); + let mut l = Vec::new(); + l.push(handle1); + + if !&args.r2_in.is_empty() { + let mut write_file_r2 = output_file(&format!("{}2", &args.prefix)); + let r2 = read_fastq(&args.r2_in[0]).records(); + let handle2 = thread::spawn(move || { + for r2_rec in r2 { + let record2 = extract(r2_rec.unwrap(), &(pattern2.as_ref().unwrap())); + write_file_r2 = write_inline_to_file(record2, write_file_r2, true); + } + }); + l.push(handle2); + } + for i in l { + if !i.is_finished() { + i.join().unwrap(); + } + } + } + } +}