From c2ab680e187d0f680caf88bbf903f0bbdaf9994c Mon Sep 17 00:00:00 2001
From: Zzzyt <37258146+Zzzzzzyt@users.noreply.github.com>
Date: Thu, 11 Jan 2024 21:40:34 +0800
Subject: [PATCH] add is_half argument to extract_feature_print.py (#1683)

* for GTX1650

* testing torch-profiler

* no more profiler & change default audio

* longer slice

* fluid container

* cache rmvpe and ui tweaks

* get my changes back after merge

* format code

* only load rmvpe when necessary

* fix rmvpe & config bug

* fix is_half again

* manual sync with upstream

* revert other changes for pull request
---
 .github/workflows/unitest.yml                   |  2 +-
 Retrieval_based_Voice_Conversion_WebUI.ipynb    |  2 +-
 Retrieval_based_Voice_Conversion_WebUI_v2.ipynb |  2 +-
 infer-web.py                                    |  5 +++--
 infer/modules/train/extract_feature_print.py    | 13 +++++++------
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/unitest.yml b/.github/workflows/unitest.yml
index 904195c..9901f9f 100644
--- a/.github/workflows/unitest.yml
+++ b/.github/workflows/unitest.yml
@@ -33,4 +33,4 @@ jobs:
         python infer/modules/train/preprocess.py logs/mute/0_gt_wavs 48000 8 logs/mi-test True 3.7
         touch logs/mi-test/extract_f0_feature.log
         python infer/modules/train/extract/extract_f0_print.py logs/mi-test $(nproc) pm
-        python infer/modules/train/extract_feature_print.py cpu 1 0 0 logs/mi-test v1
+        python infer/modules/train/extract_feature_print.py cpu 1 0 0 logs/mi-test v1 True
diff --git a/Retrieval_based_Voice_Conversion_WebUI.ipynb b/Retrieval_based_Voice_Conversion_WebUI.ipynb
index 098168a..b38d8d2 100644
--- a/Retrieval_based_Voice_Conversion_WebUI.ipynb
+++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb
@@ -290,7 +290,7 @@
     "\n",
     "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n",
     "\n",
-    "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME}"
+    "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} True"
    ]
   },
   {
diff --git a/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb b/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb
index 1eca9e1..0cad19f 100644
--- a/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb
+++ b/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb
@@ -309,7 +309,7 @@
     "\n",
     "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n",
     "\n",
-    "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME}"
+    "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} True"
    ]
   },
   {
diff --git a/infer-web.py b/infer-web.py
index 8c5f021..597bc87 100644
--- a/infer-web.py
+++ b/infer-web.py
@@ -344,7 +344,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp
     ps = []
     for idx, n_g in enumerate(gpus):
         cmd = (
-            '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s'
+            '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s %s'
             % (
                 config.python_cmd,
                 config.device,
@@ -354,6 +354,7 @@ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvp
                 now_dir,
                 exp_dir,
                 version19,
+                config.is_half,
             )
         )
         logger.info(cmd)
@@ -1517,4 +1518,4 @@ with gr.Blocks(title="RVC WebUI") as app:
             inbrowser=not config.noautoopen,
             server_port=config.listen_port,
             quiet=True,
-        )
+        )
\ No newline at end of file
diff --git a/infer/modules/train/extract_feature_print.py b/infer/modules/train/extract_feature_print.py
index f8bfc2a..f39d3eb 100644
--- a/infer/modules/train/extract_feature_print.py
+++ b/infer/modules/train/extract_feature_print.py
@@ -8,14 +8,16 @@ os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
 device = sys.argv[1]
 n_part = int(sys.argv[2])
 i_part = int(sys.argv[3])
-if len(sys.argv) == 6:
+if len(sys.argv) == 7:
     exp_dir = sys.argv[4]
     version = sys.argv[5]
+    is_half = bool(sys.argv[6])
 else:
     i_gpu = sys.argv[4]
     exp_dir = sys.argv[5]
     os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
     version = sys.argv[6]
+    is_half = bool(sys.argv[7])
 import fairseq
 import numpy as np
 import soundfile as sf
@@ -91,8 +93,9 @@ models, saved_cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
 model = models[0]
 model = model.to(device)
 printt("move model to %s" % device)
-if device not in ["mps", "cpu"]:
-    model = model.half()
+if is_half:
+    if device not in ["mps", "cpu"]:
+        model = model.half()
 model.eval()
 
 todo = sorted(list(os.listdir(wavPath)))[i_part::n_part]
@@ -113,9 +116,7 @@ else:
                 feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
                 padding_mask = torch.BoolTensor(feats.shape).fill_(False)
                 inputs = {
-                    "source": feats.half().to(device)
-                    if device not in ["mps", "cpu"]
-                    else feats.to(device),
+                    "source": feats.half().to(device) if is_half and device not in ["mps", "cpu"] else feats.to(device),
                     "padding_mask": padding_mask.to(device),
                     "output_layer": 9 if version == "v1" else 12,  # layer 9
                 }