fix: MacOS 纯 CPU 推理时 Segmentation fault: 11

see: facebookresearch/faiss#2317 facebookresearch#2410
2025-04-10 14:48:58 +08:00 · 2023-04-10 18:28:39 +08:00 · 2023-04-10 18:28:39 +08:00 · ff1a711cad
commit ff1a711cad
parent 6c7c1d933f
5 changed files with 39 additions and 21 deletions
--- a/Retrieval_based_Voice_Conversion_WebUI.ipynb
+++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb
@ -129,10 +129,10 @@
        "#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
        "\n",
        "#@markdown 数据集位置\n",
-        "DATASET = \"/content/drive/MyDrive/dataset/lulucall_48k.zip\"  #@param {type:\"string\"}\n",
+        "DATASET = \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\"  #@param {type:\"string\"}\n",
        "\n",
        "!mkdir -p /content/dataset\n",
-        "!unzip -d /content/dataset {DATASET}"
+        "!unzip -d /content/dataset -B {DATASET}"
      ],
      "metadata": {
        "id": "Mwk7Q0Loqzjx"
@ -140,13 +140,26 @@
      "execution_count": null,
      "outputs": []
    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title 重命名数据集中的重名文件\n",
+        "!ls -a /content/dataset/\n",
+        "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*"
+      ],
+      "metadata": {
+        "id": "PDlFxWHWEynD"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
    {
      "cell_type": "code",
      "source": [
        "#@title 启动web\n",
        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
-        "%load_ext tensorboard\n",
-        "%tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
+        "# %load_ext tensorboard\n",
+        "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
        "!python3 infer-web.py --colab --pycmd python3"
      ],
      "metadata": {
@ -164,7 +177,7 @@
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 模型epoch\n",
-        "MODELEPOCH = 7500  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 9600  #@param {type:\"integer\"}\n",
        "\n",
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
@ -188,7 +201,7 @@
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 模型epoch\n",
-        "MODELEPOCH = 6000  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 7500  #@param {type:\"integer\"}\n",
        "\n",
        "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
        "\n",
@ -241,7 +254,7 @@
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 停止的epoch\n",
-        "MODELEPOCH = 2500  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 3200  #@param {type:\"integer\"}\n",
        "#@markdown 保存epoch间隔\n",
        "EPOCHSAVE = 100  #@param {type:\"integer\"}\n",
        "#@markdown 采样率\n",
@ -262,7 +275,7 @@
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 选中模型epoch\n",
-        "MODELEPOCH = 7700  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 9600  #@param {type:\"integer\"}\n",
        "\n",
        "!echo \"备份选中的模型。。。\"\n",
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
@ -292,7 +305,7 @@
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 选中模型epoch\n",
-        "MODELEPOCH = 7700  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 9600  #@param {type:\"integer\"}\n",
        "\n",
        "!echo \"备份选中的模型。。。\"\n",
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
--- a/infer-web.py
+++ b/infer-web.py
@ -9,7 +9,7 @@ import faiss
 ncpu=cpu_count()
 ngpu=torch.cuda.device_count()
 gpu_infos=[]
-if(torch.cuda.is_available()==False or ngpu==0):if_gpu_ok=False
+if((not torch.cuda.is_available()) or ngpu==0):if_gpu_ok=False
 else:
    if_gpu_ok = False
    for i in range(ngpu):
@ -140,7 +140,7 @@ def uvr(model_name,inp_root,save_root_vocal,paths,save_root_ins):
        except:
            traceback.print_exc()
        print("clean_empty_cache")
-        torch.cuda.empty_cache()
+        if torch.cuda.is_available(): torch.cuda.empty_cache()
    yield "\n".join(infos)

 #一个选项卡全局只能有一个音色
@ -152,7 +152,7 @@ def get_vc(sid):
            print("clean_empty_cache")
            del net_g, n_spk, vc, hubert_model,tgt_sr#,cpt
            hubert_model = net_g=n_spk=vc=hubert_model=tgt_sr=None
-            torch.cuda.empty_cache()
+            if torch.cuda.is_available(): torch.cuda.empty_cache()
            ###楼下不这么折腾清理不干净
            if_f0 = cpt.get("f0", 1)
            if (if_f0 == 1):
@ -160,7 +160,7 @@ def get_vc(sid):
            else:
                net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
            del net_g,cpt
-            torch.cuda.empty_cache()
+            if torch.cuda.is_available(): torch.cuda.empty_cache()
            cpt=None
        return {"visible": False, "__type__": "update"}
    person = "%s/%s" % (weight_root, sid)
--- a/infer/infer-pm-index256.py
+++ b/infer/infer-pm-index256.py
@ -104,7 +104,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):##
        "padding_mask": padding_mask.to(device),
        "output_layer": 9,  # layer 9
    }
-    torch.cuda.synchronize()
+    if torch.cuda.is_available(): torch.cuda.synchronize()
    t0=ttime()
    with torch.no_grad():
        logits = model.extract_features(**inputs)
@ -116,13 +116,13 @@ for idx,name in enumerate(["冬之花clip1.wav",]):##
    feats = torch.from_numpy(big_npy[I.squeeze()].astype("float16")).unsqueeze(0).to(device)

    feats=F.interpolate(feats.permute(0,2,1),scale_factor=2).permute(0,2,1)
-    torch.cuda.synchronize()
+    if torch.cuda.is_available(): torch.cuda.synchronize()
    t1=ttime()
    # p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
    p_len = min(feats.shape[1],10000)#
    pitch, pitchf = get_f0(audio, p_len,f0_up_key)
    p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
-    torch.cuda.synchronize()
+    if torch.cuda.is_available(): torch.cuda.synchronize()
    t2=ttime()
    feats = feats[:,:p_len, :]
    pitch = pitch[:p_len]
@ -133,7 +133,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):##
    pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device)
    with torch.no_grad():
        audio = net_g.infer(feats, p_len,pitch,pitchf,sid)[0][0, 0].data.cpu().float().numpy()#nsf
-    torch.cuda.synchronize()
+    if torch.cuda.is_available(): torch.cuda.synchronize()
    t3=ttime()
    ta0+=(t1-t0)
    ta1+=(t2-t1)
--- a/requirements.txt
+++ b/requirements.txt
@ -4,7 +4,7 @@ scipy==1.9.3
 librosa==0.9.2
 llvmlite==0.39.0
 fairseq==0.12.2
-faiss-cpu==1.7.2
+faiss-cpu==1.7.0
 gradio
 Cython
 future>=0.18.3
--- a/vc_infer_pipeline.py
+++ b/vc_infer_pipeline.py
@ -72,6 +72,7 @@ class VC(object):
            "output_layer": 9,  # layer 9
        }
        t0 = ttime()
+        print("vc npy start time:", t0)
        with torch.no_grad():
            logits = model.extract_features(**inputs)
            feats  = model.final_proj(logits[0])
@ -79,13 +80,14 @@ class VC(object):
        if(isinstance(index,type(None))==False and isinstance(big_npy,type(None))==False and index_rate!=0):
            npy = feats[0].cpu().numpy()
            if(self.is_half==True):npy=npy.astype("float32")
-            D, I = index.search(npy, 1)
+            _, I = index.search(npy, 1)
            npy=big_npy[I.squeeze()]
            if(self.is_half==True):npy=npy.astype("float16")
            feats = torch.from_numpy(npy).unsqueeze(0).to(self.device)*index_rate + (1-index_rate)*feats

        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
        t1 = ttime()
+        print("vc infer start time:", t1)
        p_len = audio0.shape[0]//self.window
        if(feats.shape[1]<p_len):
            p_len=feats.shape[1]
@ -99,8 +101,9 @@ class VC(object):
            else:
                audio1 = (net_g.infer(feats, p_len, sid)[0][0, 0] * 32768).data.cpu().float().numpy().astype(np.int16)
        del feats,p_len,padding_mask
-        torch.cuda.empty_cache()
+        if torch.cuda.is_available(): torch.cuda.empty_cache()
        t2 = ttime()
+        print("vc infer end time:", t2)
        times[0] += (t1 - t0)
        times[2] += (t2 - t1)
        return audio1
@ -125,6 +128,7 @@ class VC(object):
        audio_opt=[]
        t=None
        t1=ttime()
+        print("f0 start time:", t1)
        audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode='reflect')
        p_len=audio_pad.shape[0]//self.window
        inp_f0=None
@ -146,6 +150,7 @@ class VC(object):
            pitch = torch.tensor(pitch,device=self.device).unsqueeze(0).long()
            pitchf = torch.tensor(pitchf,device=self.device).unsqueeze(0).float()
        t2=ttime()
+        print("f0 end time:", t2)
        times[1] += (t2 - t1)
        for t in opt_ts:
            t=t//self.window*self.window
@ -160,5 +165,5 @@ class VC(object):
            audio_opt.append(self.vc(model,net_g,sid,audio_pad[t:],None,None,times,index,big_npy,index_rate)[self.t_pad_tgt:-self.t_pad_tgt])
        audio_opt=np.concatenate(audio_opt)
        del pitch,pitchf,sid
-        torch.cuda.empty_cache()
+        if torch.cuda.is_available(): torch.cuda.empty_cache()
        return audio_opt