optimize: 精简未用到的配置项并在特征提取初步引入mps (#32)

2025-05-20 02:39:05 +08:00 · 2023-04-11 18:14:55 +08:00 · 2023-04-11 18:14:55 +08:00 · ecc744d748
commit ecc744d748
parent 0656591373
10 changed files with 82 additions and 57 deletions
--- a/config.py
+++ b/config.py
@ -1,3 +1,20 @@
+########################硬件参数########################
+
+#填写cuda:x, cpu 或 mps, x指代第几张卡，只支持 N卡 / Apple Silicon 加速
+device  =   "cuda:0"
+
+#9-10-20-30-40系显卡无脑True，不影响质量，>=20显卡开启有加速
+is_half =   True
+
+#默认0用上所有线程，写数字限制CPU资源使用  
+n_cpu   =   0
+
+########################硬件参数########################
+
+
+##################下为参数处理逻辑，勿动##################
+
+########################命令行参数########################
 import argparse
 parser = argparse.ArgumentParser()
 parser.add_argument("--port", type=int, default=7865, help="Listen port")
@ -5,34 +22,48 @@ parser.add_argument("--pycmd", type=str, default="python", help="Python command"
 parser.add_argument("--colab", action='store_true', help="Launch in colab")
 parser.add_argument("--noparallel", action='store_true', help="Disable parallel processing")
 cmd_opts = parser.parse_args()
-############离线VC参数
-inp_root=r"白鹭霜华长条"#对输入目录下所有音频进行转换，别放非音频文件
-opt_root=r"opt"#输出目录
-f0_up_key=0#升降调，整数，男转女12，女转男-12
-person=r"weights\洛天依v3.pt"#目前只有洛天依v3
-############硬件参数
-device = "cuda:0"#填写cuda:x或cpu，x指代第几张卡，只支持N卡加速
-is_half=True#9-10-20-30-40系显卡无脑True，不影响质量，>=20显卡开启有加速
-n_cpu=0#默认0用上所有线程，写数字限制CPU资源使用
-############python命令路径
+
 python_cmd=cmd_opts.pycmd
 listen_port=cmd_opts.port
 iscolab=cmd_opts.colab
 noparallel=cmd_opts.noparallel
-############下头别动
+########################命令行参数########################
+
+import sys
 import torch
-if(torch.cuda.is_available()==False):
-    print("没有发现支持的N卡, 使用CPU进行推理")
-    device="cpu"
-    is_half=False
-if(device!="cpu"):
-    gpu_name=torch.cuda.get_device_name(int(device.split(":")[-1]))
-    if("16"in gpu_name or "MX"in gpu_name):
+
+# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
+# check `getattr` and try it for compatibility
+def has_mps() -> bool:
+    if sys.platform != "darwin":
+        return False
+    else:
+        if not getattr(torch, 'has_mps', False): return False
+        try:
+            torch.zeros(1).to(torch.device("mps"))
+            return True
+        except Exception:
+            return False
+
+
+if(not torch.cuda.is_available()):
+    if has_mps():
+        print("没有发现支持的N卡, 使用MPS进行推理")
+        device  = "mps"
+    else:
+        print("没有发现支持的N卡, 使用CPU进行推理")
+        device  = "cpu"
+        is_half = False
+
+if(device not in ["cpu", "mps"]):
+    gpu_name = torch.cuda.get_device_name(int(device.split(":")[-1]))
+    if("16" in gpu_name or "MX" in gpu_name):
        print("16系显卡/MX系显卡强制单精度")
-        is_half=False
+        is_half = False
+
 from multiprocessing import cpu_count
-if(n_cpu==0):n_cpu=cpu_count()
-if(is_half==True):
+if(n_cpu==0): n_cpu=cpu_count()
+if(is_half):
    #6G显存配置
    x_pad       =   3
    x_query     =   10
@ -41,10 +72,6 @@ if(is_half==True):
 else:
    #5G显存配置
    x_pad       =   1
-    # x_query     =   6
-    # x_center    =   30
-    # x_max       =   32
-    #6G显存配置
    x_query     =   6
    x_center    =   38
    x_max       =   41
--- a/extract_feature_print.py
+++ b/extract_feature_print.py
@ -1,13 +1,12 @@
 import os,sys,traceback
-if len(sys.argv) == 4:
-    n_part=int(sys.argv[1])
-    i_part=int(sys.argv[2])
-    exp_dir=sys.argv[3]
-else:
-    n_part=int(sys.argv[1])
-    i_part=int(sys.argv[2])
-    i_gpu=sys.argv[3]
+device=sys.argv[1]
+n_part=int(sys.argv[2])
+i_part=int(sys.argv[3])
+if len(sys.argv) == 5:
    exp_dir=sys.argv[4]
+else:
+    i_gpu=sys.argv[4]
+    exp_dir=sys.argv[5]
    os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)

 import torch
@ -15,7 +14,6 @@ import torch.nn.functional as F
 import soundfile as sf
 import numpy as np
 from fairseq import checkpoint_utils
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

 f = open("%s/extract_f0_feature.log"%exp_dir, "a+")
 def printt(strr):
@ -50,8 +48,8 @@ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
 )
 model = models[0]
 model = model.to(device)
-if torch.cuda.is_available():
-    model = model.half()
+printt("move model to "+device)
+if device != "cpu": model = model.half()
 model.eval()

 todo=sorted(list(os.listdir(wavPath)))[i_part::n_part]
@ -70,7 +68,7 @@ else:
                feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
                padding_mask = torch.BoolTensor(feats.shape).fill_(False)
                inputs = {
-                    "source": feats.half().to(device) if torch.cuda.is_available() else feats.to(device),
+                    "source": feats.half().to(device) if device != "cpu" else feats.to(device),
                    "padding_mask": padding_mask.to(device),
                    "output_layer": 9,  # layer 9
                }
--- a/infer-web.py
+++ b/infer-web.py
@ -36,7 +36,7 @@ from fairseq import checkpoint_utils
 import gradio as gr
 import logging
 from vc_infer_pipeline import VC
-from config import is_half,device,is_half,python_cmd,listen_port,iscolab,noparallel
+from config import is_half,device,python_cmd,listen_port,iscolab,noparallel
 from infer_uvr5 import _audio_pre_
 from my_utils import load_audio
 from train.process_ckpt import show_info,change_info,merge,extract_small_model
@ -53,7 +53,7 @@ class ToolButton(gr.Button, gr.components.FormComponent):
 hubert_model=None
 def load_hubert():
    global hubert_model
-    models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",)
+    models, _, _ = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",)
    hubert_model = models[0]
    hubert_model = hubert_model.to(device)
    if(is_half):hubert_model = hubert_model.half()
@ -79,7 +79,7 @@ def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,file_big_np
        if(hubert_model==None):load_hubert()
        if_f0 = cpt.get("f0", 1)
        audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file)
-        print("npy: ", times[0], "s, f0:", times[1], "s, infer: ", times[2], "s", sep='')
+        print("npy: ", times[0], "s, f0: ", times[1], "s, infer: ", times[2], "s", sep='')
        return "Success", (tgt_sr, audio_opt)
    except:
        info=traceback.format_exc()
@ -267,7 +267,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir):
    leng=len(gpus)
    ps=[]
    for idx,n_g in enumerate(gpus):
-        cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir)
+        cmd=python_cmd + " extract_feature_print.py %s %s %s %s %s/logs/%s"%(device,leng,idx,n_g,now_dir,exp_dir)
        print(cmd)
        p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
        ps.append(p)
@ -382,7 +382,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
    leng=len(gpus)
    ps=[]
    for idx,n_g in enumerate(gpus):
-        cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1)
+        cmd=python_cmd + " extract_feature_print.py %s %s %s %s %s/logs/%s"%(device,leng,idx,n_g,now_dir,exp_dir1)
        yield get_info_str(cmd)
        p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
        ps.append(p)
--- a/infer_pack/models.py
+++ b/infer_pack/models.py
@ -345,7 +345,7 @@ class SourceModuleHnNSF(torch.nn.Module):

    def forward(self, x,upp=None):
        sine_wavs, uv, _ = self.l_sin_gen(x,upp)
-        if(self.is_half==True):sine_wavs=sine_wavs.half()
+        if(self.is_half):sine_wavs=sine_wavs.half()
        sine_merge = self.l_tanh(self.l_linear(sine_wavs))
        return sine_merge,None,None# noise, uv
 class GeneratorNSF(torch.nn.Module):
--- a/infer_pack/models_onnx.py
+++ b/infer_pack/models_onnx.py
@ -345,7 +345,7 @@ class SourceModuleHnNSF(torch.nn.Module):

    def forward(self, x,upp=None):
        sine_wavs, uv, _ = self.l_sin_gen(x,upp)
-        if(self.is_half==True):sine_wavs=sine_wavs.half()
+        if(self.is_half):sine_wavs=sine_wavs.half()
        sine_merge = self.l_tanh(self.l_linear(sine_wavs))
        return sine_merge,None,None# noise, uv
 class GeneratorNSF(torch.nn.Module):
--- a/infer_uvr5.py
+++ b/infer_uvr5.py
@ -39,7 +39,7 @@ class  _audio_pre_():
        cpk = torch.load( model_path , map_location='cpu')  
        model.load_state_dict(cpk)
        model.eval()
-        if(is_half==True):model = model.half().to(device)
+        if(is_half):model = model.half().to(device)
        else:model = model.to(device)

        self.mp = mp
--- a/slicer2.py
+++ b/slicer2.py
@ -182,4 +182,4 @@ def main():


 if __name__ == '__main__':
-    main()
+    main()
--- a/trainset_preprocess_pipeline_print.py
+++ b/trainset_preprocess_pipeline_print.py
@ -40,7 +40,7 @@ class PreProcess():
        os.makedirs(self.gt_wavs_dir,exist_ok=True)
        os.makedirs(self.wavs16k_dir,exist_ok=True)

-    def print(self, strr):
+    def println(self, strr):
        mutex.acquire()
        print(strr)
        self.f.write("%s\n" % strr)
@ -70,9 +70,9 @@ class PreProcess():
                        tmp_audio = audio[start:]
                        break
                self.norm_write(tmp_audio, idx0, idx1)
-            self.print("%s->Suc."%path)
+            self.println("%s->Suc."%path)
        except:
-            self.print("%s->%s"%(path,traceback.format_exc()))
+            self.println("%s->%s"%(path,traceback.format_exc()))

    def pipeline_mp(self,infos):
        for path, idx0 in infos:
@ -91,14 +91,14 @@ class PreProcess():
                    ps.append(p)
                    for p in ps:p.join()
        except:
-            self.print("Fail. %s"%traceback.format_exc())
+            self.println("Fail. %s"%traceback.format_exc())

 def preprocess_trainset(inp_root, sr, n_p, exp_dir):
    pp=PreProcess(sr,exp_dir)
-    pp.print("start preprocess")
-    pp.print(sys.argv)
+    pp.println("start preprocess")
+    pp.println(sys.argv)
    pp.pipeline_mp_inp_dir(inp_root,n_p)
-    pp.print("end preprocess")
+    pp.println("end preprocess")

 if __name__=='__main__':
    preprocess_trainset(inp_root, sr, n_p, exp_dir)
--- a/uvr5_pack/utils.py
+++ b/uvr5_pack/utils.py
@ -27,7 +27,7 @@ def inference(X_spec, device, model, aggressiveness,data):
                start = i * roi_size
                X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']]
                X_mag_window = torch.from_numpy(X_mag_window)
-                if(is_half==True):X_mag_window=X_mag_window.half()
+                if(is_half):X_mag_window=X_mag_window.half()
                X_mag_window=X_mag_window.to(device)

                pred = model.predict(X_mag_window, aggressiveness)
--- a/vc_infer_pipeline.py
+++ b/vc_infer_pipeline.py
@ -58,7 +58,7 @@ class VC(object):

    def vc(self,model,net_g,sid,audio0,pitch,pitchf,times,index,big_npy,index_rate):#,file_index,file_big_npy
        feats = torch.from_numpy(audio0)
-        if(self.is_half==True):feats=feats.half()
+        if(self.is_half):feats=feats.half()
        else:feats=feats.float()
        if feats.dim() == 2:  # double channels
            feats = feats.mean(-1)
@ -78,10 +78,10 @@ class VC(object):

        if(isinstance(index,type(None))==False and isinstance(big_npy,type(None))==False and index_rate!=0):
            npy = feats[0].cpu().numpy()
-            if(self.is_half==True):npy=npy.astype("float32")
+            if(self.is_half):npy=npy.astype("float32")
            _, I = index.search(npy, 1)
            npy=big_npy[I.squeeze()]
-            if(self.is_half==True):npy=npy.astype("float16")
+            if(self.is_half):npy=npy.astype("float16")
            feats = torch.from_numpy(npy).unsqueeze(0).to(self.device)*index_rate + (1-index_rate)*feats

        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)