From 72906f726c1a6b6f6b114a14728cb503f63a985a Mon Sep 17 00:00:00 2001 From: Karolina Zrobek Date: Sun, 16 Jun 2024 18:26:52 +0200 Subject: [PATCH] Repo struct change and fix undefined names --- .github/workflows/python-package.yml | 4 +- .../models/CLAP/__init__.py => LICENSE | 0 .../training/__init__.py => pyproject.toml | 0 .../AudioSep/AudioSep_Colab.ipynb | 0 src/{ => v_audio_cc}/AudioSep/CONTRIBUTING.md | 0 src/{ => v_audio_cc}/AudioSep/LICENSE | 0 src/{ => v_audio_cc}/AudioSep/README.md | 0 .../AudioSep/assets/results.png | Bin src/{ => v_audio_cc}/AudioSep/benchmark.py | 0 .../AudioSep/callbacks/base.py | 0 src/{ => v_audio_cc}/AudioSep/cog.yaml | 0 .../AudioSep/config/audiosep_base.yaml | 0 .../AudioSep/data/audiotext_dataset.py | 0 .../AudioSep/data/datamodules.py | 0 .../AudioSep/data/waveform_mixers.py | 0 .../AudioSep/datafiles/template.json | 0 src/{ => v_audio_cc}/AudioSep/environment.yml | 0 .../AudioSep/evaluation/evaluate_audiocaps.py | 0 .../AudioSep/evaluation/evaluate_audioset.py | 0 .../AudioSep/evaluation/evaluate_clotho.py | 0 .../AudioSep/evaluation/evaluate_esc50.py | 0 .../AudioSep/evaluation/evaluate_music.py | 0 .../AudioSep/evaluation/evaluate_vggsound.py | 0 .../evaluation/metadata/audiocaps_eval.csv | 0 .../evaluation/metadata/audioset_eval.csv | 0 .../metadata/class_labels_indices.csv | 0 .../evaluation/metadata/clotho_eval.csv | 0 .../evaluation/metadata/esc50_eval.csv | 0 .../evaluation/metadata/music_eval.csv | 0 .../evaluation/metadata/vggsound_eval.csv | 0 src/{ => v_audio_cc}/AudioSep/losses.py | 0 .../AudioSep/models/CLAP/__init__.py | 0 .../models/CLAP/open_clip/__init__.py | 0 .../AudioSep/models/CLAP/open_clip/bert.py | 0 .../open_clip/bpe_simple_vocab_16e6.txt.gz | Bin .../AudioSep/models/CLAP/open_clip/factory.py | 0 .../models/CLAP/open_clip/feature_fusion.py | 0 .../AudioSep/models/CLAP/open_clip/htsat.py | 0 .../models/CLAP/open_clip/linear_probe.py | 0 .../AudioSep/models/CLAP/open_clip/loss.py | 0 .../AudioSep/models/CLAP/open_clip/model.py | 114 +++++++++--------- .../open_clip/model_configs/HTSAT-base.json | 0 .../open_clip/model_configs/HTSAT-large.json | 0 .../model_configs/HTSAT-tiny-win-1536.json | 0 .../open_clip/model_configs/HTSAT-tiny.json | 0 .../CLAP/open_clip/model_configs/PANN-10.json | 0 .../model_configs/PANN-14-fmax-18k.json | 0 .../model_configs/PANN-14-fmax-8k-20s.json | 0 .../PANN-14-tiny-transformer.json | 0 .../model_configs/PANN-14-win-1536.json | 0 .../CLAP/open_clip/model_configs/PANN-14.json | 0 .../CLAP/open_clip/model_configs/PANN-6.json | 0 .../model_configs/RN101-quickgelu.json | 0 .../CLAP/open_clip/model_configs/RN101.json | 0 .../model_configs/RN50-quickgelu.json | 0 .../CLAP/open_clip/model_configs/RN50.json | 0 .../CLAP/open_clip/model_configs/RN50x16.json | 0 .../CLAP/open_clip/model_configs/RN50x4.json | 0 .../open_clip/model_configs/ViT-B-16.json | 0 .../model_configs/ViT-B-32-quickgelu.json | 0 .../open_clip/model_configs/ViT-B-32.json | 0 .../open_clip/model_configs/ViT-L-14.json | 0 .../AudioSep/models/CLAP/open_clip/openai.py | 0 .../models/CLAP/open_clip/pann_model.py | 0 .../models/CLAP/open_clip/pretrained.py | 0 .../models/CLAP/open_clip/timm_model.py | 0 .../models/CLAP/open_clip/tokenizer.py | 0 .../models/CLAP/open_clip/transform.py | 0 .../AudioSep/models/CLAP/open_clip/utils.py | 0 .../AudioSep/models/CLAP/open_clip/version.py | 0 .../AudioSep/models/CLAP/training/__init__.py | 0 .../models/CLAP/training/audioset_textmap.npy | Bin .../AudioSep/models/CLAP/training/data.py | 0 .../models/CLAP/training/distributed.py | 0 .../CLAP/training/imagenet_zeroshot_data.py | 0 .../models/CLAP/training/infer_demo.py | 0 .../AudioSep/models/CLAP/training/logger.py | 0 .../AudioSep/models/CLAP/training/lp_main.py | 0 .../AudioSep/models/CLAP/training/lp_train.py | 0 .../AudioSep/models/CLAP/training/main.py | 0 .../AudioSep/models/CLAP/training/params.py | 0 .../models/CLAP/training/scheduler.py | 0 .../AudioSep/models/CLAP/training/train.py | 0 .../models/CLAP/training/zero_shot.py | 0 .../AudioSep/models/audiosep.py | 0 src/{ => v_audio_cc}/AudioSep/models/base.py | 0 .../AudioSep/models/clap_encoder.py | 0 .../AudioSep/models/resunet.py | 0 .../AudioSep/optimizers/lr_schedulers.py | 0 src/{ => v_audio_cc}/AudioSep/pipeline.py | 0 src/{ => v_audio_cc}/AudioSep/predict.py | 0 src/{ => v_audio_cc}/AudioSep/train.py | 0 src/{ => v_audio_cc}/AudioSep/utils.py | 56 ++++----- src/v_audio_cc/__init__.py | 0 src/{ => v_audio_cc}/audio_classifier.py | 0 .../audio_source_separator.py | 0 src/{ => v_audio_cc}/data_processing.py | 0 src/{ => v_audio_cc}/generate_subtitles.py | 0 src/{ => v_audio_cc}/main.py | 0 src/{ => v_audio_cc}/pipeline.py | 0 100 files changed, 86 insertions(+), 88 deletions(-) rename src/AudioSep/models/CLAP/__init__.py => LICENSE (100%) rename src/AudioSep/models/CLAP/training/__init__.py => pyproject.toml (100%) rename src/{ => v_audio_cc}/AudioSep/AudioSep_Colab.ipynb (100%) rename src/{ => v_audio_cc}/AudioSep/CONTRIBUTING.md (100%) rename src/{ => v_audio_cc}/AudioSep/LICENSE (100%) rename src/{ => v_audio_cc}/AudioSep/README.md (100%) rename src/{ => v_audio_cc}/AudioSep/assets/results.png (100%) rename src/{ => v_audio_cc}/AudioSep/benchmark.py (100%) rename src/{ => v_audio_cc}/AudioSep/callbacks/base.py (100%) rename src/{ => v_audio_cc}/AudioSep/cog.yaml (100%) rename src/{ => v_audio_cc}/AudioSep/config/audiosep_base.yaml (100%) rename src/{ => v_audio_cc}/AudioSep/data/audiotext_dataset.py (100%) rename src/{ => v_audio_cc}/AudioSep/data/datamodules.py (100%) rename src/{ => v_audio_cc}/AudioSep/data/waveform_mixers.py (100%) rename src/{ => v_audio_cc}/AudioSep/datafiles/template.json (100%) rename src/{ => v_audio_cc}/AudioSep/environment.yml (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/evaluate_audiocaps.py (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/evaluate_audioset.py (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/evaluate_clotho.py (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/evaluate_esc50.py (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/evaluate_music.py (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/evaluate_vggsound.py (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/metadata/audiocaps_eval.csv (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/metadata/audioset_eval.csv (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/metadata/class_labels_indices.csv (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/metadata/clotho_eval.csv (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/metadata/esc50_eval.csv (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/metadata/music_eval.csv (100%) rename src/{ => v_audio_cc}/AudioSep/evaluation/metadata/vggsound_eval.csv (100%) rename src/{ => v_audio_cc}/AudioSep/losses.py (100%) create mode 100644 src/v_audio_cc/AudioSep/models/CLAP/__init__.py rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/__init__.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/bert.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/bpe_simple_vocab_16e6.txt.gz (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/factory.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/feature_fusion.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/htsat.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/linear_probe.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/loss.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model.py (93%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-base.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-large.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny-win-1536.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/PANN-10.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-18k.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-8k-20s.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-tiny-transformer.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-win-1536.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/PANN-14.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/PANN-6.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/RN101-quickgelu.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/RN101.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/RN50-quickgelu.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/RN50.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/RN50x16.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/RN50x4.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-16.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32-quickgelu.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/model_configs/ViT-L-14.json (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/openai.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/pann_model.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/pretrained.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/timm_model.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/tokenizer.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/transform.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/utils.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/open_clip/version.py (100%) create mode 100644 src/v_audio_cc/AudioSep/models/CLAP/training/__init__.py rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/audioset_textmap.npy (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/data.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/distributed.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/imagenet_zeroshot_data.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/infer_demo.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/logger.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/lp_main.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/lp_train.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/main.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/params.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/scheduler.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/train.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/CLAP/training/zero_shot.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/audiosep.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/base.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/clap_encoder.py (100%) rename src/{ => v_audio_cc}/AudioSep/models/resunet.py (100%) rename src/{ => v_audio_cc}/AudioSep/optimizers/lr_schedulers.py (100%) rename src/{ => v_audio_cc}/AudioSep/pipeline.py (100%) rename src/{ => v_audio_cc}/AudioSep/predict.py (100%) rename src/{ => v_audio_cc}/AudioSep/train.py (100%) rename src/{ => v_audio_cc}/AudioSep/utils.py (90%) create mode 100644 src/v_audio_cc/__init__.py rename src/{ => v_audio_cc}/audio_classifier.py (100%) rename src/{ => v_audio_cc}/audio_source_separator.py (100%) rename src/{ => v_audio_cc}/data_processing.py (100%) rename src/{ => v_audio_cc}/generate_subtitles.py (100%) rename src/{ => v_audio_cc}/main.py (100%) rename src/{ => v_audio_cc}/pipeline.py (100%) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index e56abb6..60c80b3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -35,6 +35,4 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pytest + diff --git a/src/AudioSep/models/CLAP/__init__.py b/LICENSE similarity index 100% rename from src/AudioSep/models/CLAP/__init__.py rename to LICENSE diff --git a/src/AudioSep/models/CLAP/training/__init__.py b/pyproject.toml similarity index 100% rename from src/AudioSep/models/CLAP/training/__init__.py rename to pyproject.toml diff --git a/src/AudioSep/AudioSep_Colab.ipynb b/src/v_audio_cc/AudioSep/AudioSep_Colab.ipynb similarity index 100% rename from src/AudioSep/AudioSep_Colab.ipynb rename to src/v_audio_cc/AudioSep/AudioSep_Colab.ipynb diff --git a/src/AudioSep/CONTRIBUTING.md b/src/v_audio_cc/AudioSep/CONTRIBUTING.md similarity index 100% rename from src/AudioSep/CONTRIBUTING.md rename to src/v_audio_cc/AudioSep/CONTRIBUTING.md diff --git a/src/AudioSep/LICENSE b/src/v_audio_cc/AudioSep/LICENSE similarity index 100% rename from src/AudioSep/LICENSE rename to src/v_audio_cc/AudioSep/LICENSE diff --git a/src/AudioSep/README.md b/src/v_audio_cc/AudioSep/README.md similarity index 100% rename from src/AudioSep/README.md rename to src/v_audio_cc/AudioSep/README.md diff --git a/src/AudioSep/assets/results.png b/src/v_audio_cc/AudioSep/assets/results.png similarity index 100% rename from src/AudioSep/assets/results.png rename to src/v_audio_cc/AudioSep/assets/results.png diff --git a/src/AudioSep/benchmark.py b/src/v_audio_cc/AudioSep/benchmark.py similarity index 100% rename from src/AudioSep/benchmark.py rename to src/v_audio_cc/AudioSep/benchmark.py diff --git a/src/AudioSep/callbacks/base.py b/src/v_audio_cc/AudioSep/callbacks/base.py similarity index 100% rename from src/AudioSep/callbacks/base.py rename to src/v_audio_cc/AudioSep/callbacks/base.py diff --git a/src/AudioSep/cog.yaml b/src/v_audio_cc/AudioSep/cog.yaml similarity index 100% rename from src/AudioSep/cog.yaml rename to src/v_audio_cc/AudioSep/cog.yaml diff --git a/src/AudioSep/config/audiosep_base.yaml b/src/v_audio_cc/AudioSep/config/audiosep_base.yaml similarity index 100% rename from src/AudioSep/config/audiosep_base.yaml rename to src/v_audio_cc/AudioSep/config/audiosep_base.yaml diff --git a/src/AudioSep/data/audiotext_dataset.py b/src/v_audio_cc/AudioSep/data/audiotext_dataset.py similarity index 100% rename from src/AudioSep/data/audiotext_dataset.py rename to src/v_audio_cc/AudioSep/data/audiotext_dataset.py diff --git a/src/AudioSep/data/datamodules.py b/src/v_audio_cc/AudioSep/data/datamodules.py similarity index 100% rename from src/AudioSep/data/datamodules.py rename to src/v_audio_cc/AudioSep/data/datamodules.py diff --git a/src/AudioSep/data/waveform_mixers.py b/src/v_audio_cc/AudioSep/data/waveform_mixers.py similarity index 100% rename from src/AudioSep/data/waveform_mixers.py rename to src/v_audio_cc/AudioSep/data/waveform_mixers.py diff --git a/src/AudioSep/datafiles/template.json b/src/v_audio_cc/AudioSep/datafiles/template.json similarity index 100% rename from src/AudioSep/datafiles/template.json rename to src/v_audio_cc/AudioSep/datafiles/template.json diff --git a/src/AudioSep/environment.yml b/src/v_audio_cc/AudioSep/environment.yml similarity index 100% rename from src/AudioSep/environment.yml rename to src/v_audio_cc/AudioSep/environment.yml diff --git a/src/AudioSep/evaluation/evaluate_audiocaps.py b/src/v_audio_cc/AudioSep/evaluation/evaluate_audiocaps.py similarity index 100% rename from src/AudioSep/evaluation/evaluate_audiocaps.py rename to src/v_audio_cc/AudioSep/evaluation/evaluate_audiocaps.py diff --git a/src/AudioSep/evaluation/evaluate_audioset.py b/src/v_audio_cc/AudioSep/evaluation/evaluate_audioset.py similarity index 100% rename from src/AudioSep/evaluation/evaluate_audioset.py rename to src/v_audio_cc/AudioSep/evaluation/evaluate_audioset.py diff --git a/src/AudioSep/evaluation/evaluate_clotho.py b/src/v_audio_cc/AudioSep/evaluation/evaluate_clotho.py similarity index 100% rename from src/AudioSep/evaluation/evaluate_clotho.py rename to src/v_audio_cc/AudioSep/evaluation/evaluate_clotho.py diff --git a/src/AudioSep/evaluation/evaluate_esc50.py b/src/v_audio_cc/AudioSep/evaluation/evaluate_esc50.py similarity index 100% rename from src/AudioSep/evaluation/evaluate_esc50.py rename to src/v_audio_cc/AudioSep/evaluation/evaluate_esc50.py diff --git a/src/AudioSep/evaluation/evaluate_music.py b/src/v_audio_cc/AudioSep/evaluation/evaluate_music.py similarity index 100% rename from src/AudioSep/evaluation/evaluate_music.py rename to src/v_audio_cc/AudioSep/evaluation/evaluate_music.py diff --git a/src/AudioSep/evaluation/evaluate_vggsound.py b/src/v_audio_cc/AudioSep/evaluation/evaluate_vggsound.py similarity index 100% rename from src/AudioSep/evaluation/evaluate_vggsound.py rename to src/v_audio_cc/AudioSep/evaluation/evaluate_vggsound.py diff --git a/src/AudioSep/evaluation/metadata/audiocaps_eval.csv b/src/v_audio_cc/AudioSep/evaluation/metadata/audiocaps_eval.csv similarity index 100% rename from src/AudioSep/evaluation/metadata/audiocaps_eval.csv rename to src/v_audio_cc/AudioSep/evaluation/metadata/audiocaps_eval.csv diff --git a/src/AudioSep/evaluation/metadata/audioset_eval.csv b/src/v_audio_cc/AudioSep/evaluation/metadata/audioset_eval.csv similarity index 100% rename from src/AudioSep/evaluation/metadata/audioset_eval.csv rename to src/v_audio_cc/AudioSep/evaluation/metadata/audioset_eval.csv diff --git a/src/AudioSep/evaluation/metadata/class_labels_indices.csv b/src/v_audio_cc/AudioSep/evaluation/metadata/class_labels_indices.csv similarity index 100% rename from src/AudioSep/evaluation/metadata/class_labels_indices.csv rename to src/v_audio_cc/AudioSep/evaluation/metadata/class_labels_indices.csv diff --git a/src/AudioSep/evaluation/metadata/clotho_eval.csv b/src/v_audio_cc/AudioSep/evaluation/metadata/clotho_eval.csv similarity index 100% rename from src/AudioSep/evaluation/metadata/clotho_eval.csv rename to src/v_audio_cc/AudioSep/evaluation/metadata/clotho_eval.csv diff --git a/src/AudioSep/evaluation/metadata/esc50_eval.csv b/src/v_audio_cc/AudioSep/evaluation/metadata/esc50_eval.csv similarity index 100% rename from src/AudioSep/evaluation/metadata/esc50_eval.csv rename to src/v_audio_cc/AudioSep/evaluation/metadata/esc50_eval.csv diff --git a/src/AudioSep/evaluation/metadata/music_eval.csv b/src/v_audio_cc/AudioSep/evaluation/metadata/music_eval.csv similarity index 100% rename from src/AudioSep/evaluation/metadata/music_eval.csv rename to src/v_audio_cc/AudioSep/evaluation/metadata/music_eval.csv diff --git a/src/AudioSep/evaluation/metadata/vggsound_eval.csv b/src/v_audio_cc/AudioSep/evaluation/metadata/vggsound_eval.csv similarity index 100% rename from src/AudioSep/evaluation/metadata/vggsound_eval.csv rename to src/v_audio_cc/AudioSep/evaluation/metadata/vggsound_eval.csv diff --git a/src/AudioSep/losses.py b/src/v_audio_cc/AudioSep/losses.py similarity index 100% rename from src/AudioSep/losses.py rename to src/v_audio_cc/AudioSep/losses.py diff --git a/src/v_audio_cc/AudioSep/models/CLAP/__init__.py b/src/v_audio_cc/AudioSep/models/CLAP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/AudioSep/models/CLAP/open_clip/__init__.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/__init__.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/__init__.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/__init__.py diff --git a/src/AudioSep/models/CLAP/open_clip/bert.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/bert.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/bert.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/bert.py diff --git a/src/AudioSep/models/CLAP/open_clip/bpe_simple_vocab_16e6.txt.gz b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/bpe_simple_vocab_16e6.txt.gz similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/bpe_simple_vocab_16e6.txt.gz rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/bpe_simple_vocab_16e6.txt.gz diff --git a/src/AudioSep/models/CLAP/open_clip/factory.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/factory.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/factory.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/factory.py diff --git a/src/AudioSep/models/CLAP/open_clip/feature_fusion.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/feature_fusion.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/feature_fusion.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/feature_fusion.py diff --git a/src/AudioSep/models/CLAP/open_clip/htsat.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/htsat.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/htsat.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/htsat.py diff --git a/src/AudioSep/models/CLAP/open_clip/linear_probe.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/linear_probe.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/linear_probe.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/linear_probe.py diff --git a/src/AudioSep/models/CLAP/open_clip/loss.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/loss.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/loss.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/loss.py diff --git a/src/AudioSep/models/CLAP/open_clip/model.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model.py similarity index 93% rename from src/AudioSep/models/CLAP/open_clip/model.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model.py index 5677da7..df8e1c5 100644 --- a/src/AudioSep/models/CLAP/open_clip/model.py +++ b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model.py @@ -780,63 +780,63 @@ def get_audio_embedding(self, data): return audio_embeds - def audio_infer(self, audio, hopsize=None, device=None): - """Forward one audio and produce the audio embedding - - Parameters - ---------- - audio: (audio_length) - the time-domain audio input, notice that it must be only one input - hopsize: int - the overlap hopsize as the sliding window - - Returns - ---------- - output_dict: { - key: [n, (embedding_shape)] if "HTS-AT" - or - key: [(embedding_shape)] if "PANN" - } - the list of key values of the audio branch - - """ - - assert not self.training, "the inference mode must be run at eval stage" - output_dict = {} - # PANN - if self.audio_cfg.model_type == "PANN": - audio_input = audio.unsqueeze(dim=0) - output_dict[key] = self.encode_audio(audio_input, device=device)[ - key - ].squeeze(dim=0) - elif self.audio_cfg.model_type == "HTSAT": - # repeat - audio_len = len(audio) - k = self.audio_cfg.clip_samples // audio_len - if k > 1: - audio = audio.repeat(k) - audio_len = len(audio) - - if hopsize is None: - hopsize = min(hopsize, audio_len) - - if audio_len > self.audio_cfg.clip_samples: - audio_input = [ - audio[pos : pos + self.audio_cfg.clip_samples].clone() - for pos in range( - 0, audio_len - self.audio_cfg.clip_samples, hopsize - ) - ] - audio_input.append(audio[-self.audio_cfg.clip_samples :].clone()) - audio_input = torch.stack(audio_input) - output_dict[key] = self.encode_audio(audio_input, device=device)[key] - else: - audio_input = audio.unsqueeze(dim=0) - output_dict[key] = self.encode_audio(audio_input, device=device)[ - key - ].squeeze(dim=0) - - return output_dict + # def audio_infer(self, audio, hopsize=None, device=None): + # """Forward one audio and produce the audio embedding + + # Parameters + # ---------- + # audio: (audio_length) + # the time-domain audio input, notice that it must be only one input + # hopsize: int + # the overlap hopsize as the sliding window + + # Returns + # ---------- + # output_dict: { + # key: [n, (embedding_shape)] if "HTS-AT" + # or + # key: [(embedding_shape)] if "PANN" + # } + # the list of key values of the audio branch + + # """ + + # assert not self.training, "the inference mode must be run at eval stage" + # output_dict = {} + # # PANN + # if self.audio_cfg.model_type == "PANN": + # audio_input = audio.unsqueeze(dim=0) + # output_dict[key] = self.encode_audio(audio_input, device=device)[ + # key + # ].squeeze(dim=0) + # elif self.audio_cfg.model_type == "HTSAT": + # # repeat + # audio_len = len(audio) + # k = self.audio_cfg.clip_samples // audio_len + # if k > 1: + # audio = audio.repeat(k) + # audio_len = len(audio) + + # if hopsize is None: + # hopsize = min(hopsize, audio_len) + + # if audio_len > self.audio_cfg.clip_samples: + # audio_input = [ + # audio[pos : pos + self.audio_cfg.clip_samples].clone() + # for pos in range( + # 0, audio_len - self.audio_cfg.clip_samples, hopsize + # ) + # ] + # audio_input.append(audio[-self.audio_cfg.clip_samples :].clone()) + # audio_input = torch.stack(audio_input) + # output_dict[key] = self.encode_audio(audio_input, device=device)[key] + # else: + # audio_input = audio.unsqueeze(dim=0) + # output_dict[key] = self.encode_audio(audio_input, device=device)[ + # key + # ].squeeze(dim=0) + + # return output_dict def convert_weights_to_fp16(model: nn.Module): diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-base.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-base.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-base.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-base.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-large.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-large.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-large.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-large.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny-win-1536.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny-win-1536.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny-win-1536.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny-win-1536.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/HTSAT-tiny.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/PANN-10.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-10.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/PANN-10.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-10.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-18k.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-18k.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-18k.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-18k.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-8k-20s.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-8k-20s.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-8k-20s.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-fmax-8k-20s.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-tiny-transformer.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-tiny-transformer.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-tiny-transformer.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-tiny-transformer.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-win-1536.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-win-1536.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-win-1536.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14-win-1536.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/PANN-14.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-14.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/PANN-6.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-6.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/PANN-6.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/PANN-6.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/RN101-quickgelu.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN101-quickgelu.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/RN101-quickgelu.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN101-quickgelu.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/RN101.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN101.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/RN101.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN101.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/RN50-quickgelu.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN50-quickgelu.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/RN50-quickgelu.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN50-quickgelu.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/RN50.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN50.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/RN50.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN50.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/RN50x16.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN50x16.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/RN50x16.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN50x16.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/RN50x4.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN50x4.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/RN50x4.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/RN50x4.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-16.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-16.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-16.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-16.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32-quickgelu.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32-quickgelu.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32-quickgelu.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32-quickgelu.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/ViT-B-32.json diff --git a/src/AudioSep/models/CLAP/open_clip/model_configs/ViT-L-14.json b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/ViT-L-14.json similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/model_configs/ViT-L-14.json rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/model_configs/ViT-L-14.json diff --git a/src/AudioSep/models/CLAP/open_clip/openai.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/openai.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/openai.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/openai.py diff --git a/src/AudioSep/models/CLAP/open_clip/pann_model.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/pann_model.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/pann_model.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/pann_model.py diff --git a/src/AudioSep/models/CLAP/open_clip/pretrained.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/pretrained.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/pretrained.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/pretrained.py diff --git a/src/AudioSep/models/CLAP/open_clip/timm_model.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/timm_model.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/timm_model.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/timm_model.py diff --git a/src/AudioSep/models/CLAP/open_clip/tokenizer.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/tokenizer.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/tokenizer.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/tokenizer.py diff --git a/src/AudioSep/models/CLAP/open_clip/transform.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/transform.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/transform.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/transform.py diff --git a/src/AudioSep/models/CLAP/open_clip/utils.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/utils.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/utils.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/utils.py diff --git a/src/AudioSep/models/CLAP/open_clip/version.py b/src/v_audio_cc/AudioSep/models/CLAP/open_clip/version.py similarity index 100% rename from src/AudioSep/models/CLAP/open_clip/version.py rename to src/v_audio_cc/AudioSep/models/CLAP/open_clip/version.py diff --git a/src/v_audio_cc/AudioSep/models/CLAP/training/__init__.py b/src/v_audio_cc/AudioSep/models/CLAP/training/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/AudioSep/models/CLAP/training/audioset_textmap.npy b/src/v_audio_cc/AudioSep/models/CLAP/training/audioset_textmap.npy similarity index 100% rename from src/AudioSep/models/CLAP/training/audioset_textmap.npy rename to src/v_audio_cc/AudioSep/models/CLAP/training/audioset_textmap.npy diff --git a/src/AudioSep/models/CLAP/training/data.py b/src/v_audio_cc/AudioSep/models/CLAP/training/data.py similarity index 100% rename from src/AudioSep/models/CLAP/training/data.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/data.py diff --git a/src/AudioSep/models/CLAP/training/distributed.py b/src/v_audio_cc/AudioSep/models/CLAP/training/distributed.py similarity index 100% rename from src/AudioSep/models/CLAP/training/distributed.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/distributed.py diff --git a/src/AudioSep/models/CLAP/training/imagenet_zeroshot_data.py b/src/v_audio_cc/AudioSep/models/CLAP/training/imagenet_zeroshot_data.py similarity index 100% rename from src/AudioSep/models/CLAP/training/imagenet_zeroshot_data.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/imagenet_zeroshot_data.py diff --git a/src/AudioSep/models/CLAP/training/infer_demo.py b/src/v_audio_cc/AudioSep/models/CLAP/training/infer_demo.py similarity index 100% rename from src/AudioSep/models/CLAP/training/infer_demo.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/infer_demo.py diff --git a/src/AudioSep/models/CLAP/training/logger.py b/src/v_audio_cc/AudioSep/models/CLAP/training/logger.py similarity index 100% rename from src/AudioSep/models/CLAP/training/logger.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/logger.py diff --git a/src/AudioSep/models/CLAP/training/lp_main.py b/src/v_audio_cc/AudioSep/models/CLAP/training/lp_main.py similarity index 100% rename from src/AudioSep/models/CLAP/training/lp_main.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/lp_main.py diff --git a/src/AudioSep/models/CLAP/training/lp_train.py b/src/v_audio_cc/AudioSep/models/CLAP/training/lp_train.py similarity index 100% rename from src/AudioSep/models/CLAP/training/lp_train.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/lp_train.py diff --git a/src/AudioSep/models/CLAP/training/main.py b/src/v_audio_cc/AudioSep/models/CLAP/training/main.py similarity index 100% rename from src/AudioSep/models/CLAP/training/main.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/main.py diff --git a/src/AudioSep/models/CLAP/training/params.py b/src/v_audio_cc/AudioSep/models/CLAP/training/params.py similarity index 100% rename from src/AudioSep/models/CLAP/training/params.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/params.py diff --git a/src/AudioSep/models/CLAP/training/scheduler.py b/src/v_audio_cc/AudioSep/models/CLAP/training/scheduler.py similarity index 100% rename from src/AudioSep/models/CLAP/training/scheduler.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/scheduler.py diff --git a/src/AudioSep/models/CLAP/training/train.py b/src/v_audio_cc/AudioSep/models/CLAP/training/train.py similarity index 100% rename from src/AudioSep/models/CLAP/training/train.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/train.py diff --git a/src/AudioSep/models/CLAP/training/zero_shot.py b/src/v_audio_cc/AudioSep/models/CLAP/training/zero_shot.py similarity index 100% rename from src/AudioSep/models/CLAP/training/zero_shot.py rename to src/v_audio_cc/AudioSep/models/CLAP/training/zero_shot.py diff --git a/src/AudioSep/models/audiosep.py b/src/v_audio_cc/AudioSep/models/audiosep.py similarity index 100% rename from src/AudioSep/models/audiosep.py rename to src/v_audio_cc/AudioSep/models/audiosep.py diff --git a/src/AudioSep/models/base.py b/src/v_audio_cc/AudioSep/models/base.py similarity index 100% rename from src/AudioSep/models/base.py rename to src/v_audio_cc/AudioSep/models/base.py diff --git a/src/AudioSep/models/clap_encoder.py b/src/v_audio_cc/AudioSep/models/clap_encoder.py similarity index 100% rename from src/AudioSep/models/clap_encoder.py rename to src/v_audio_cc/AudioSep/models/clap_encoder.py diff --git a/src/AudioSep/models/resunet.py b/src/v_audio_cc/AudioSep/models/resunet.py similarity index 100% rename from src/AudioSep/models/resunet.py rename to src/v_audio_cc/AudioSep/models/resunet.py diff --git a/src/AudioSep/optimizers/lr_schedulers.py b/src/v_audio_cc/AudioSep/optimizers/lr_schedulers.py similarity index 100% rename from src/AudioSep/optimizers/lr_schedulers.py rename to src/v_audio_cc/AudioSep/optimizers/lr_schedulers.py diff --git a/src/AudioSep/pipeline.py b/src/v_audio_cc/AudioSep/pipeline.py similarity index 100% rename from src/AudioSep/pipeline.py rename to src/v_audio_cc/AudioSep/pipeline.py diff --git a/src/AudioSep/predict.py b/src/v_audio_cc/AudioSep/predict.py similarity index 100% rename from src/AudioSep/predict.py rename to src/v_audio_cc/AudioSep/predict.py diff --git a/src/AudioSep/train.py b/src/v_audio_cc/AudioSep/train.py similarity index 100% rename from src/AudioSep/train.py rename to src/v_audio_cc/AudioSep/train.py diff --git a/src/AudioSep/utils.py b/src/v_audio_cc/AudioSep/utils.py similarity index 90% rename from src/AudioSep/utils.py rename to src/v_audio_cc/AudioSep/utils.py index 1ca65d6..fedb4f9 100644 --- a/src/AudioSep/utils.py +++ b/src/v_audio_cc/AudioSep/utils.py @@ -86,43 +86,43 @@ def get_audioset632_id_to_lb(ontology_path: str) -> Dict: return audioset632_id_to_lb -def load_pretrained_panns( - model_type: str, - checkpoint_path: str, - freeze: bool -) -> nn.Module: - r"""Load pretrained pretrained audio neural networks (PANNs). +# def load_pretrained_panns( +# model_type: str, +# checkpoint_path: str, +# freeze: bool +# ) -> nn.Module: +# r"""Load pretrained pretrained audio neural networks (PANNs). - Args: - model_type: str, e.g., "Cnn14" - checkpoint_path, str, e.g., "Cnn14_mAP=0.431.pth" - freeze: bool +# Args: +# model_type: str, e.g., "Cnn14" +# checkpoint_path, str, e.g., "Cnn14_mAP=0.431.pth" +# freeze: bool - Returns: - model: nn.Module - """ +# Returns: +# model: nn.Module +# """ - if model_type == "Cnn14": - Model = Cnn14 +# if model_type == "Cnn14": +# Model = Cnn14 - elif model_type == "Cnn14_DecisionLevelMax": - Model = Cnn14_DecisionLevelMax +# elif model_type == "Cnn14_DecisionLevelMax": +# Model = Cnn14_DecisionLevelMax - else: - raise NotImplementedError +# else: +# raise NotImplementedError - model = Model(sample_rate=32000, window_size=1024, hop_size=320, - mel_bins=64, fmin=50, fmax=14000, classes_num=527) +# model = Model(sample_rate=32000, window_size=1024, hop_size=320, +# mel_bins=64, fmin=50, fmax=14000, classes_num=527) - if checkpoint_path: - checkpoint = torch.load(checkpoint_path, map_location="cpu") - model.load_state_dict(checkpoint["model"]) +# if checkpoint_path: +# checkpoint = torch.load(checkpoint_path, map_location="cpu") +# model.load_state_dict(checkpoint["model"]) - if freeze: - for param in model.parameters(): - param.requires_grad = False +# if freeze: +# for param in model.parameters(): +# param.requires_grad = False - return model +# return model def energy(x): diff --git a/src/v_audio_cc/__init__.py b/src/v_audio_cc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/audio_classifier.py b/src/v_audio_cc/audio_classifier.py similarity index 100% rename from src/audio_classifier.py rename to src/v_audio_cc/audio_classifier.py diff --git a/src/audio_source_separator.py b/src/v_audio_cc/audio_source_separator.py similarity index 100% rename from src/audio_source_separator.py rename to src/v_audio_cc/audio_source_separator.py diff --git a/src/data_processing.py b/src/v_audio_cc/data_processing.py similarity index 100% rename from src/data_processing.py rename to src/v_audio_cc/data_processing.py diff --git a/src/generate_subtitles.py b/src/v_audio_cc/generate_subtitles.py similarity index 100% rename from src/generate_subtitles.py rename to src/v_audio_cc/generate_subtitles.py diff --git a/src/main.py b/src/v_audio_cc/main.py similarity index 100% rename from src/main.py rename to src/v_audio_cc/main.py diff --git a/src/pipeline.py b/src/v_audio_cc/pipeline.py similarity index 100% rename from src/pipeline.py rename to src/v_audio_cc/pipeline.py