Repo struct change and fix undefined names

kmazrolina · Jun 16, 2024 · f97c677 · f97c677
1 parent bc82135
commit f97c677
Show file tree

Hide file tree

Showing 99 changed files with 85 additions and 85 deletions.
diff --git a/src/AudioSep/models/CLAP/__init__.py → LICENSE b/src/AudioSep/models/CLAP/__init__.py → LICENSE
diff --git a/...AudioSep/models/CLAP/training/__init__.py → pyproject.toml b/...AudioSep/models/CLAP/training/__init__.py → pyproject.toml
diff --git a/src/AudioSep/AudioSep_Colab.ipynb → src/v_audio_cc/AudioSep/AudioSep_Colab.ipynb b/src/AudioSep/AudioSep_Colab.ipynb → src/v_audio_cc/AudioSep/AudioSep_Colab.ipynb
diff --git a/src/AudioSep/CONTRIBUTING.md → src/v_audio_cc/AudioSep/CONTRIBUTING.md b/src/AudioSep/CONTRIBUTING.md → src/v_audio_cc/AudioSep/CONTRIBUTING.md
diff --git a/src/AudioSep/LICENSE → src/v_audio_cc/AudioSep/LICENSE b/src/AudioSep/LICENSE → src/v_audio_cc/AudioSep/LICENSE
diff --git a/src/AudioSep/README.md → src/v_audio_cc/AudioSep/README.md b/src/AudioSep/README.md → src/v_audio_cc/AudioSep/README.md
diff --git a/src/AudioSep/assets/results.png → src/v_audio_cc/AudioSep/assets/results.png b/src/AudioSep/assets/results.png → src/v_audio_cc/AudioSep/assets/results.png
diff --git a/src/AudioSep/benchmark.py → src/v_audio_cc/AudioSep/benchmark.py b/src/AudioSep/benchmark.py → src/v_audio_cc/AudioSep/benchmark.py
diff --git a/src/AudioSep/callbacks/base.py → src/v_audio_cc/AudioSep/callbacks/base.py b/src/AudioSep/callbacks/base.py → src/v_audio_cc/AudioSep/callbacks/base.py
diff --git a/src/AudioSep/cog.yaml → src/v_audio_cc/AudioSep/cog.yaml b/src/AudioSep/cog.yaml → src/v_audio_cc/AudioSep/cog.yaml
diff --git a/src/AudioSep/config/audiosep_base.yaml → ...dio_cc/AudioSep/config/audiosep_base.yaml b/src/AudioSep/config/audiosep_base.yaml → ...dio_cc/AudioSep/config/audiosep_base.yaml
diff --git a/src/AudioSep/data/audiotext_dataset.py → ...dio_cc/AudioSep/data/audiotext_dataset.py b/src/AudioSep/data/audiotext_dataset.py → ...dio_cc/AudioSep/data/audiotext_dataset.py
diff --git a/src/AudioSep/data/datamodules.py → src/v_audio_cc/AudioSep/data/datamodules.py b/src/AudioSep/data/datamodules.py → src/v_audio_cc/AudioSep/data/datamodules.py
diff --git a/src/AudioSep/data/waveform_mixers.py → ...audio_cc/AudioSep/data/waveform_mixers.py b/src/AudioSep/data/waveform_mixers.py → ...audio_cc/AudioSep/data/waveform_mixers.py
diff --git a/src/AudioSep/datafiles/template.json → ...audio_cc/AudioSep/datafiles/template.json b/src/AudioSep/datafiles/template.json → ...audio_cc/AudioSep/datafiles/template.json
diff --git a/src/AudioSep/environment.yml → src/v_audio_cc/AudioSep/environment.yml b/src/AudioSep/environment.yml → src/v_audio_cc/AudioSep/environment.yml
diff --git a/...AudioSep/evaluation/evaluate_audiocaps.py → ...AudioSep/evaluation/evaluate_audiocaps.py b/...AudioSep/evaluation/evaluate_audiocaps.py → ...AudioSep/evaluation/evaluate_audiocaps.py
diff --git a/src/AudioSep/evaluation/evaluate_audioset.py → .../AudioSep/evaluation/evaluate_audioset.py b/src/AudioSep/evaluation/evaluate_audioset.py → .../AudioSep/evaluation/evaluate_audioset.py
diff --git a/src/AudioSep/evaluation/evaluate_clotho.py → ...cc/AudioSep/evaluation/evaluate_clotho.py b/src/AudioSep/evaluation/evaluate_clotho.py → ...cc/AudioSep/evaluation/evaluate_clotho.py
diff --git a/src/AudioSep/evaluation/evaluate_esc50.py → ..._cc/AudioSep/evaluation/evaluate_esc50.py b/src/AudioSep/evaluation/evaluate_esc50.py → ..._cc/AudioSep/evaluation/evaluate_esc50.py
diff --git a/src/AudioSep/evaluation/evaluate_music.py → ..._cc/AudioSep/evaluation/evaluate_music.py b/src/AudioSep/evaluation/evaluate_music.py → ..._cc/AudioSep/evaluation/evaluate_music.py
diff --git a/src/AudioSep/evaluation/evaluate_vggsound.py → .../AudioSep/evaluation/evaluate_vggsound.py b/src/AudioSep/evaluation/evaluate_vggsound.py → .../AudioSep/evaluation/evaluate_vggsound.py
diff --git a/...ep/evaluation/metadata/audiocaps_eval.csv → ...ep/evaluation/metadata/audiocaps_eval.csv b/...ep/evaluation/metadata/audiocaps_eval.csv → ...ep/evaluation/metadata/audiocaps_eval.csv
diff --git a/...Sep/evaluation/metadata/audioset_eval.csv → ...Sep/evaluation/metadata/audioset_eval.csv b/...Sep/evaluation/metadata/audioset_eval.csv → ...Sep/evaluation/metadata/audioset_eval.csv
diff --git a/...luation/metadata/class_labels_indices.csv → ...luation/metadata/class_labels_indices.csv b/...luation/metadata/class_labels_indices.csv → ...luation/metadata/class_labels_indices.csv
diff --git a/...ioSep/evaluation/metadata/clotho_eval.csv → ...ioSep/evaluation/metadata/clotho_eval.csv b/...ioSep/evaluation/metadata/clotho_eval.csv → ...ioSep/evaluation/metadata/clotho_eval.csv
diff --git a/...dioSep/evaluation/metadata/esc50_eval.csv → ...dioSep/evaluation/metadata/esc50_eval.csv b/...dioSep/evaluation/metadata/esc50_eval.csv → ...dioSep/evaluation/metadata/esc50_eval.csv
diff --git a/...dioSep/evaluation/metadata/music_eval.csv → ...dioSep/evaluation/metadata/music_eval.csv b/...dioSep/evaluation/metadata/music_eval.csv → ...dioSep/evaluation/metadata/music_eval.csv
diff --git a/...Sep/evaluation/metadata/vggsound_eval.csv → ...Sep/evaluation/metadata/vggsound_eval.csv b/...Sep/evaluation/metadata/vggsound_eval.csv → ...Sep/evaluation/metadata/vggsound_eval.csv
diff --git a/src/AudioSep/losses.py → src/v_audio_cc/AudioSep/losses.py b/src/AudioSep/losses.py → src/v_audio_cc/AudioSep/losses.py
diff --git a/src/v_audio_cc/AudioSep/models/CLAP/__init__.py b/src/v_audio_cc/AudioSep/models/CLAP/__init__.py
diff --git a/...udioSep/models/CLAP/open_clip/__init__.py → ...udioSep/models/CLAP/open_clip/__init__.py b/...udioSep/models/CLAP/open_clip/__init__.py → ...udioSep/models/CLAP/open_clip/__init__.py
diff --git a/src/AudioSep/models/CLAP/open_clip/bert.py → ...cc/AudioSep/models/CLAP/open_clip/bert.py b/src/AudioSep/models/CLAP/open_clip/bert.py → ...cc/AudioSep/models/CLAP/open_clip/bert.py
diff --git a/...AP/open_clip/bpe_simple_vocab_16e6.txt.gz → ...AP/open_clip/bpe_simple_vocab_16e6.txt.gz b/...AP/open_clip/bpe_simple_vocab_16e6.txt.gz → ...AP/open_clip/bpe_simple_vocab_16e6.txt.gz
diff --git a/...AudioSep/models/CLAP/open_clip/factory.py → ...AudioSep/models/CLAP/open_clip/factory.py b/...AudioSep/models/CLAP/open_clip/factory.py → ...AudioSep/models/CLAP/open_clip/factory.py
diff --git a/...p/models/CLAP/open_clip/feature_fusion.py → ...p/models/CLAP/open_clip/feature_fusion.py b/...p/models/CLAP/open_clip/feature_fusion.py → ...p/models/CLAP/open_clip/feature_fusion.py
diff --git a/src/AudioSep/models/CLAP/open_clip/htsat.py → ...c/AudioSep/models/CLAP/open_clip/htsat.py b/src/AudioSep/models/CLAP/open_clip/htsat.py → ...c/AudioSep/models/CLAP/open_clip/htsat.py
diff --git a/...Sep/models/CLAP/open_clip/linear_probe.py → ...Sep/models/CLAP/open_clip/linear_probe.py b/...Sep/models/CLAP/open_clip/linear_probe.py → ...Sep/models/CLAP/open_clip/linear_probe.py
diff --git a/src/AudioSep/models/CLAP/open_clip/loss.py → ...cc/AudioSep/models/CLAP/open_clip/loss.py b/src/AudioSep/models/CLAP/open_clip/loss.py → ...cc/AudioSep/models/CLAP/open_clip/loss.py
diff --git a/src/AudioSep/models/CLAP/open_clip/model.py → ...c/AudioSep/models/CLAP/open_clip/model.py b/src/AudioSep/models/CLAP/open_clip/model.py → ...c/AudioSep/models/CLAP/open_clip/model.py
@@ -780,63 +780,63 @@ def get_audio_embedding(self, data):
 
         return audio_embeds
 
-    def audio_infer(self, audio, hopsize=None, device=None):
-        """Forward one audio and produce the audio embedding
-
-        Parameters
-        ----------
-        audio:  (audio_length)
-            the time-domain audio input, notice that it must be only one input
-        hopsize: int
-            the overlap hopsize as the sliding window
-
-        Returns
-        ----------
-        output_dict: {
-            key: [n, (embedding_shape)] if "HTS-AT"
-            or
-            key: [(embedding_shape)] if "PANN"
-        }
-            the list of key values of the audio branch
-
-        """
-
-        assert not self.training, "the inference mode must be run at eval stage"
-        output_dict = {}
-        # PANN
-        if self.audio_cfg.model_type == "PANN":
-            audio_input = audio.unsqueeze(dim=0)
-            output_dict[key] = self.encode_audio(audio_input, device=device)[
-                key
-            ].squeeze(dim=0)
-        elif self.audio_cfg.model_type == "HTSAT":
-            # repeat
-            audio_len = len(audio)
-            k = self.audio_cfg.clip_samples // audio_len
-            if k > 1:
-                audio = audio.repeat(k)
-                audio_len = len(audio)
-
-            if hopsize is None:
-                hopsize = min(hopsize, audio_len)
-
-            if audio_len > self.audio_cfg.clip_samples:
-                audio_input = [
-                    audio[pos : pos + self.audio_cfg.clip_samples].clone()
-                    for pos in range(
-                        0, audio_len - self.audio_cfg.clip_samples, hopsize
-                    )
-                ]
-                audio_input.append(audio[-self.audio_cfg.clip_samples :].clone())
-                audio_input = torch.stack(audio_input)
-                output_dict[key] = self.encode_audio(audio_input, device=device)[key]
-            else:
-                audio_input = audio.unsqueeze(dim=0)
-                output_dict[key] = self.encode_audio(audio_input, device=device)[
-                    key
-                ].squeeze(dim=0)
-
-        return output_dict
+    # def audio_infer(self, audio, hopsize=None, device=None):
+    #     """Forward one audio and produce the audio embedding
+
+    #     Parameters
+    #     ----------
+    #     audio:  (audio_length)
+    #         the time-domain audio input, notice that it must be only one input
+    #     hopsize: int
+    #         the overlap hopsize as the sliding window
+
+    #     Returns
+    #     ----------
+    #     output_dict: {
+    #         key: [n, (embedding_shape)] if "HTS-AT"
+    #         or
+    #         key: [(embedding_shape)] if "PANN"
+    #     }
+    #         the list of key values of the audio branch
+
+    #     """
+
+    #     assert not self.training, "the inference mode must be run at eval stage"
+    #     output_dict = {}
+    #     # PANN
+    #     if self.audio_cfg.model_type == "PANN":
+    #         audio_input = audio.unsqueeze(dim=0)
+    #         output_dict[key] = self.encode_audio(audio_input, device=device)[
+    #             key
+    #         ].squeeze(dim=0)
+    #     elif self.audio_cfg.model_type == "HTSAT":
+    #         # repeat
+    #         audio_len = len(audio)
+    #         k = self.audio_cfg.clip_samples // audio_len
+    #         if k > 1:
+    #             audio = audio.repeat(k)
+    #             audio_len = len(audio)
+
+    #         if hopsize is None:
+    #             hopsize = min(hopsize, audio_len)
+
+    #         if audio_len > self.audio_cfg.clip_samples:
+    #             audio_input = [
+    #                 audio[pos : pos + self.audio_cfg.clip_samples].clone()
+    #                 for pos in range(
+    #                     0, audio_len - self.audio_cfg.clip_samples, hopsize
+    #                 )
+    #             ]
+    #             audio_input.append(audio[-self.audio_cfg.clip_samples :].clone())
+    #             audio_input = torch.stack(audio_input)
+    #             output_dict[key] = self.encode_audio(audio_input, device=device)[key]
+    #         else:
+    #             audio_input = audio.unsqueeze(dim=0)
+    #             output_dict[key] = self.encode_audio(audio_input, device=device)[
+    #                 key
+    #             ].squeeze(dim=0)
+
+    #     return output_dict
 
 
 def convert_weights_to_fp16(model: nn.Module):

diff --git a/...P/open_clip/model_configs/HTSAT-base.json → ...P/open_clip/model_configs/HTSAT-base.json b/...P/open_clip/model_configs/HTSAT-base.json → ...P/open_clip/model_configs/HTSAT-base.json
diff --git a/.../open_clip/model_configs/HTSAT-large.json → .../open_clip/model_configs/HTSAT-large.json b/.../open_clip/model_configs/HTSAT-large.json → .../open_clip/model_configs/HTSAT-large.json
diff --git a/...ip/model_configs/HTSAT-tiny-win-1536.json → ...ip/model_configs/HTSAT-tiny-win-1536.json b/...ip/model_configs/HTSAT-tiny-win-1536.json → ...ip/model_configs/HTSAT-tiny-win-1536.json
diff --git a/...P/open_clip/model_configs/HTSAT-tiny.json → ...P/open_clip/model_configs/HTSAT-tiny.json b/...P/open_clip/model_configs/HTSAT-tiny.json → ...P/open_clip/model_configs/HTSAT-tiny.json
diff --git a/...CLAP/open_clip/model_configs/PANN-10.json → ...CLAP/open_clip/model_configs/PANN-10.json b/...CLAP/open_clip/model_configs/PANN-10.json → ...CLAP/open_clip/model_configs/PANN-10.json
diff --git a/..._clip/model_configs/PANN-14-fmax-18k.json → ..._clip/model_configs/PANN-14-fmax-18k.json b/..._clip/model_configs/PANN-14-fmax-18k.json → ..._clip/model_configs/PANN-14-fmax-18k.json
diff --git a/...ip/model_configs/PANN-14-fmax-8k-20s.json → ...ip/model_configs/PANN-14-fmax-8k-20s.json b/...ip/model_configs/PANN-14-fmax-8k-20s.json → ...ip/model_configs/PANN-14-fmax-8k-20s.json
diff --git a/...del_configs/PANN-14-tiny-transformer.json → ...del_configs/PANN-14-tiny-transformer.json b/...del_configs/PANN-14-tiny-transformer.json → ...del_configs/PANN-14-tiny-transformer.json
diff --git a/..._clip/model_configs/PANN-14-win-1536.json → ..._clip/model_configs/PANN-14-win-1536.json b/..._clip/model_configs/PANN-14-win-1536.json → ..._clip/model_configs/PANN-14-win-1536.json
diff --git a/...CLAP/open_clip/model_configs/PANN-14.json → ...CLAP/open_clip/model_configs/PANN-14.json b/...CLAP/open_clip/model_configs/PANN-14.json → ...CLAP/open_clip/model_configs/PANN-14.json
diff --git a/.../CLAP/open_clip/model_configs/PANN-6.json → .../CLAP/open_clip/model_configs/PANN-6.json b/.../CLAP/open_clip/model_configs/PANN-6.json → .../CLAP/open_clip/model_configs/PANN-6.json
diff --git a/...n_clip/model_configs/RN101-quickgelu.json → ...n_clip/model_configs/RN101-quickgelu.json b/...n_clip/model_configs/RN101-quickgelu.json → ...n_clip/model_configs/RN101-quickgelu.json
diff --git a/...s/CLAP/open_clip/model_configs/RN101.json → ...s/CLAP/open_clip/model_configs/RN101.json b/...s/CLAP/open_clip/model_configs/RN101.json → ...s/CLAP/open_clip/model_configs/RN101.json
diff --git a/...en_clip/model_configs/RN50-quickgelu.json → ...en_clip/model_configs/RN50-quickgelu.json b/...en_clip/model_configs/RN50-quickgelu.json → ...en_clip/model_configs/RN50-quickgelu.json
diff --git a/...ls/CLAP/open_clip/model_configs/RN50.json → ...ls/CLAP/open_clip/model_configs/RN50.json b/...ls/CLAP/open_clip/model_configs/RN50.json → ...ls/CLAP/open_clip/model_configs/RN50.json
diff --git a/...CLAP/open_clip/model_configs/RN50x16.json → ...CLAP/open_clip/model_configs/RN50x16.json b/...CLAP/open_clip/model_configs/RN50x16.json → ...CLAP/open_clip/model_configs/RN50x16.json
diff --git a/.../CLAP/open_clip/model_configs/RN50x4.json → .../CLAP/open_clip/model_configs/RN50x4.json b/.../CLAP/open_clip/model_configs/RN50x4.json → .../CLAP/open_clip/model_configs/RN50x4.json
diff --git a/...LAP/open_clip/model_configs/ViT-B-16.json → ...LAP/open_clip/model_configs/ViT-B-16.json b/...LAP/open_clip/model_configs/ViT-B-16.json → ...LAP/open_clip/model_configs/ViT-B-16.json
diff --git a/...lip/model_configs/ViT-B-32-quickgelu.json → ...lip/model_configs/ViT-B-32-quickgelu.json b/...lip/model_configs/ViT-B-32-quickgelu.json → ...lip/model_configs/ViT-B-32-quickgelu.json
diff --git a/...LAP/open_clip/model_configs/ViT-B-32.json → ...LAP/open_clip/model_configs/ViT-B-32.json b/...LAP/open_clip/model_configs/ViT-B-32.json → ...LAP/open_clip/model_configs/ViT-B-32.json
diff --git a/...LAP/open_clip/model_configs/ViT-L-14.json → ...LAP/open_clip/model_configs/ViT-L-14.json b/...LAP/open_clip/model_configs/ViT-L-14.json → ...LAP/open_clip/model_configs/ViT-L-14.json
diff --git a/src/AudioSep/models/CLAP/open_clip/openai.py → .../AudioSep/models/CLAP/open_clip/openai.py b/src/AudioSep/models/CLAP/open_clip/openai.py → .../AudioSep/models/CLAP/open_clip/openai.py
diff --git a/...ioSep/models/CLAP/open_clip/pann_model.py → ...ioSep/models/CLAP/open_clip/pann_model.py b/...ioSep/models/CLAP/open_clip/pann_model.py → ...ioSep/models/CLAP/open_clip/pann_model.py
diff --git a/...ioSep/models/CLAP/open_clip/pretrained.py → ...ioSep/models/CLAP/open_clip/pretrained.py b/...ioSep/models/CLAP/open_clip/pretrained.py → ...ioSep/models/CLAP/open_clip/pretrained.py
diff --git a/...ioSep/models/CLAP/open_clip/timm_model.py → ...ioSep/models/CLAP/open_clip/timm_model.py b/...ioSep/models/CLAP/open_clip/timm_model.py → ...ioSep/models/CLAP/open_clip/timm_model.py
diff --git a/...dioSep/models/CLAP/open_clip/tokenizer.py → ...dioSep/models/CLAP/open_clip/tokenizer.py b/...dioSep/models/CLAP/open_clip/tokenizer.py → ...dioSep/models/CLAP/open_clip/tokenizer.py
diff --git a/...dioSep/models/CLAP/open_clip/transform.py → ...dioSep/models/CLAP/open_clip/transform.py b/...dioSep/models/CLAP/open_clip/transform.py → ...dioSep/models/CLAP/open_clip/transform.py
diff --git a/src/AudioSep/models/CLAP/open_clip/utils.py → ...c/AudioSep/models/CLAP/open_clip/utils.py b/src/AudioSep/models/CLAP/open_clip/utils.py → ...c/AudioSep/models/CLAP/open_clip/utils.py
diff --git a/...AudioSep/models/CLAP/open_clip/version.py → ...AudioSep/models/CLAP/open_clip/version.py b/...AudioSep/models/CLAP/open_clip/version.py → ...AudioSep/models/CLAP/open_clip/version.py
diff --git a/src/v_audio_cc/AudioSep/models/CLAP/training/__init__.py b/src/v_audio_cc/AudioSep/models/CLAP/training/__init__.py
diff --git a/...models/CLAP/training/audioset_textmap.npy → ...models/CLAP/training/audioset_textmap.npy b/...models/CLAP/training/audioset_textmap.npy → ...models/CLAP/training/audioset_textmap.npy
diff --git a/src/AudioSep/models/CLAP/training/data.py → ..._cc/AudioSep/models/CLAP/training/data.py b/src/AudioSep/models/CLAP/training/data.py → ..._cc/AudioSep/models/CLAP/training/data.py
diff --git a/...ioSep/models/CLAP/training/distributed.py → ...ioSep/models/CLAP/training/distributed.py b/...ioSep/models/CLAP/training/distributed.py → ...ioSep/models/CLAP/training/distributed.py
diff --git a/...s/CLAP/training/imagenet_zeroshot_data.py → ...s/CLAP/training/imagenet_zeroshot_data.py b/...s/CLAP/training/imagenet_zeroshot_data.py → ...s/CLAP/training/imagenet_zeroshot_data.py
diff --git a/...dioSep/models/CLAP/training/infer_demo.py → ...dioSep/models/CLAP/training/infer_demo.py b/...dioSep/models/CLAP/training/infer_demo.py → ...dioSep/models/CLAP/training/infer_demo.py
diff --git a/src/AudioSep/models/CLAP/training/logger.py → ...c/AudioSep/models/CLAP/training/logger.py b/src/AudioSep/models/CLAP/training/logger.py → ...c/AudioSep/models/CLAP/training/logger.py
diff --git a/src/AudioSep/models/CLAP/training/lp_main.py → .../AudioSep/models/CLAP/training/lp_main.py b/src/AudioSep/models/CLAP/training/lp_main.py → .../AudioSep/models/CLAP/training/lp_main.py
diff --git a/...AudioSep/models/CLAP/training/lp_train.py → ...AudioSep/models/CLAP/training/lp_train.py b/...AudioSep/models/CLAP/training/lp_train.py → ...AudioSep/models/CLAP/training/lp_train.py
diff --git a/src/AudioSep/models/CLAP/training/main.py → ..._cc/AudioSep/models/CLAP/training/main.py b/src/AudioSep/models/CLAP/training/main.py → ..._cc/AudioSep/models/CLAP/training/main.py
diff --git a/src/AudioSep/models/CLAP/training/params.py → ...c/AudioSep/models/CLAP/training/params.py b/src/AudioSep/models/CLAP/training/params.py → ...c/AudioSep/models/CLAP/training/params.py
diff --git a/...udioSep/models/CLAP/training/scheduler.py → ...udioSep/models/CLAP/training/scheduler.py b/...udioSep/models/CLAP/training/scheduler.py → ...udioSep/models/CLAP/training/scheduler.py
diff --git a/src/AudioSep/models/CLAP/training/train.py → ...cc/AudioSep/models/CLAP/training/train.py b/src/AudioSep/models/CLAP/training/train.py → ...cc/AudioSep/models/CLAP/training/train.py
diff --git a/...udioSep/models/CLAP/training/zero_shot.py → ...udioSep/models/CLAP/training/zero_shot.py b/...udioSep/models/CLAP/training/zero_shot.py → ...udioSep/models/CLAP/training/zero_shot.py
diff --git a/src/AudioSep/models/audiosep.py → src/v_audio_cc/AudioSep/models/audiosep.py b/src/AudioSep/models/audiosep.py → src/v_audio_cc/AudioSep/models/audiosep.py
diff --git a/src/AudioSep/models/base.py → src/v_audio_cc/AudioSep/models/base.py b/src/AudioSep/models/base.py → src/v_audio_cc/AudioSep/models/base.py
diff --git a/src/AudioSep/models/clap_encoder.py → ..._audio_cc/AudioSep/models/clap_encoder.py b/src/AudioSep/models/clap_encoder.py → ..._audio_cc/AudioSep/models/clap_encoder.py
diff --git a/src/AudioSep/models/resunet.py → src/v_audio_cc/AudioSep/models/resunet.py b/src/AudioSep/models/resunet.py → src/v_audio_cc/AudioSep/models/resunet.py
diff --git a/src/AudioSep/optimizers/lr_schedulers.py → ...o_cc/AudioSep/optimizers/lr_schedulers.py b/src/AudioSep/optimizers/lr_schedulers.py → ...o_cc/AudioSep/optimizers/lr_schedulers.py
diff --git a/src/AudioSep/pipeline.py → src/v_audio_cc/AudioSep/pipeline.py b/src/AudioSep/pipeline.py → src/v_audio_cc/AudioSep/pipeline.py
diff --git a/src/AudioSep/predict.py → src/v_audio_cc/AudioSep/predict.py b/src/AudioSep/predict.py → src/v_audio_cc/AudioSep/predict.py
diff --git a/src/AudioSep/train.py → src/v_audio_cc/AudioSep/train.py b/src/AudioSep/train.py → src/v_audio_cc/AudioSep/train.py
diff --git a/src/AudioSep/utils.py → src/v_audio_cc/AudioSep/utils.py b/src/AudioSep/utils.py → src/v_audio_cc/AudioSep/utils.py
@@ -86,43 +86,43 @@ def get_audioset632_id_to_lb(ontology_path: str) -> Dict:
     return audioset632_id_to_lb
 
 
-def load_pretrained_panns(
-    model_type: str,
-    checkpoint_path: str,
-    freeze: bool
-) -> nn.Module:
-    r"""Load pretrained pretrained audio neural networks (PANNs).
+# def load_pretrained_panns(
+#     model_type: str,
+#     checkpoint_path: str,
+#     freeze: bool
+# ) -> nn.Module:
+#     r"""Load pretrained pretrained audio neural networks (PANNs).
 
-    Args:
-        model_type: str, e.g., "Cnn14"
-        checkpoint_path, str, e.g., "Cnn14_mAP=0.431.pth"
-        freeze: bool
+#     Args:
+#         model_type: str, e.g., "Cnn14"
+#         checkpoint_path, str, e.g., "Cnn14_mAP=0.431.pth"
+#         freeze: bool
 
-    Returns:
-        model: nn.Module
-    """
+#     Returns:
+#         model: nn.Module
+#     """
 
-    if model_type == "Cnn14":
-        Model = Cnn14
+#     if model_type == "Cnn14":
+#         Model = Cnn14
 
-    elif model_type == "Cnn14_DecisionLevelMax":
-        Model = Cnn14_DecisionLevelMax
+#     elif model_type == "Cnn14_DecisionLevelMax":
+#         Model = Cnn14_DecisionLevelMax
 
-    else:
-        raise NotImplementedError
+#     else:
+#         raise NotImplementedError
 
-    model = Model(sample_rate=32000, window_size=1024, hop_size=320,
-                  mel_bins=64, fmin=50, fmax=14000, classes_num=527)
+#     model = Model(sample_rate=32000, window_size=1024, hop_size=320,
+#                   mel_bins=64, fmin=50, fmax=14000, classes_num=527)
 
-    if checkpoint_path:
-        checkpoint = torch.load(checkpoint_path, map_location="cpu")
-        model.load_state_dict(checkpoint["model"])
+#     if checkpoint_path:
+#         checkpoint = torch.load(checkpoint_path, map_location="cpu")
+#         model.load_state_dict(checkpoint["model"])
 
-    if freeze:
-        for param in model.parameters():
-            param.requires_grad = False
+#     if freeze:
+#         for param in model.parameters():
+#             param.requires_grad = False
 
-    return model
+#     return model
 
 
 def energy(x):

diff --git a/src/v_audio_cc/__init__.py b/src/v_audio_cc/__init__.py
diff --git a/src/audio_classifier.py → src/v_audio_cc/audio_classifier.py b/src/audio_classifier.py → src/v_audio_cc/audio_classifier.py
diff --git a/src/audio_source_separator.py → src/v_audio_cc/audio_source_separator.py b/src/audio_source_separator.py → src/v_audio_cc/audio_source_separator.py
diff --git a/src/data_processing.py → src/v_audio_cc/data_processing.py b/src/data_processing.py → src/v_audio_cc/data_processing.py
diff --git a/src/generate_subtitles.py → src/v_audio_cc/generate_subtitles.py b/src/generate_subtitles.py → src/v_audio_cc/generate_subtitles.py
diff --git a/src/main.py → src/v_audio_cc/main.py b/src/main.py → src/v_audio_cc/main.py
diff --git a/src/pipeline.py → src/v_audio_cc/pipeline.py b/src/pipeline.py → src/v_audio_cc/pipeline.py