Merge pull request #7 from fumiama/main

fix: 融合后的模型无法加载&优化colab笔记本
2025-04-30 08:38:59 +08:00 · 2023-04-10 10:32:35 +08:00 · 2023-04-10 10:32:35 +08:00 · 9a855e0457
commit 9a855e0457
parent 9e5f5edccc ffef3678cb
2 changed files with 131 additions and 112 deletions
--- a/Retrieval_based_Voice_Conversion_WebUI.ipynb
+++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb
@ -1,12 +1,30 @@
 {
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "private_outputs": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "gpuClass": "standard"
+  },
  "cells": [
    {
-      "attachments": {},
      "cell_type": "markdown",
-      "metadata": {},
      "source": [
        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
-      ]
+      ],
+      "metadata": {
+        "id": "ZFFCx5J80SGa"
+      }
    },
    {
      "cell_type": "code",
@ -22,53 +40,48 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "wjddIFr1oS3W"
-      },
-      "outputs": [],
      "source": [
        "#@title 安装依赖\n",
        "!apt-get -y install build-essential python3-dev ffmpeg\n",
        "!pip3 install --upgrade setuptools wheel\n",
        "!pip3 install --upgrade pip\n",
        "!pip3 install faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
-      ]
+      ],
+      "metadata": {
+        "id": "wjddIFr1oS3W"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ge_97mfpgqTm"
-      },
-      "outputs": [],
      "source": [
        "#@title 克隆仓库\n",
        "\n",
        "!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
        "!mkdir -p pretrained uvr5_weights"
-      ]
+      ],
+      "metadata": {
+        "id": "ge_97mfpgqTm"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "BLDEZADkvlw1"
-      },
-      "outputs": [],
      "source": [
        "#@title 更新仓库（一般无需执行）\n",
        "!git pull"
-      ]
+      ],
+      "metadata": {
+        "id": "BLDEZADkvlw1"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "UG3XpUwEomUz"
-      },
-      "outputs": [],
      "source": [
        "#@title 下载底模\n",
        "!apt -y install -qq aria2\n",
@ -89,47 +102,61 @@
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n",
        "\n",
        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
-      ]
+      ],
+      "metadata": {
+        "id": "UG3XpUwEomUz"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "source": [
+        "#@title 挂载谷歌云盘\n",
+        "\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
      "metadata": {
-        "id": "Mwk7Q0Loqzjx"
+        "id": "jwu07JgqoFON"
      },
-      "outputs": [],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
      "source": [
        "#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
        "\n",
        "#@markdown 数据集位置\n",
-        "DATASET = \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\"  #@param {type:\"string\"}\n",
+        "DATASET = \"/content/drive/MyDrive/dataset/lulucall_48k.zip\"  #@param {type:\"string\"}\n",
        "\n",
-        "from google.colab import drive\n",
-        "drive.mount('/content/drive')\n",
        "!mkdir -p /content/dataset\n",
        "!unzip -d /content/dataset {DATASET}"
-      ]
+      ],
+      "metadata": {
+        "id": "Mwk7Q0Loqzjx"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "7vh6vphDwO0b"
-      },
-      "outputs": [],
      "source": [
        "#@title 启动web\n",
        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
+        "%load_ext tensorboard\n",
+        "%tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
        "!python3 infer-web.py --colab --pycmd python3"
-      ]
+      ],
+      "metadata": {
+        "id": "7vh6vphDwO0b"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "FgJuNeAwx5Y_"
-      },
-      "outputs": [],
      "source": [
        "#@title 手动将训练后的模型文件备份到谷歌云盘\n",
        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
@ -137,7 +164,7 @@
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 模型epoch\n",
-        "MODELEPOCH = 6600  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 7500  #@param {type:\"integer\"}\n",
        "\n",
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
@ -145,15 +172,15 @@
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
        "\n",
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
-      ]
+      ],
+      "metadata": {
+        "id": "FgJuNeAwx5Y_"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "OVQoLQJXS7WX"
-      },
-      "outputs": [],
      "source": [
        "#@title 从谷歌云盘恢复pth\n",
        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
@ -161,7 +188,7 @@
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 模型epoch\n",
-        "MODELEPOCH = 250  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 6000  #@param {type:\"integer\"}\n",
        "\n",
        "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
        "\n",
@ -170,72 +197,72 @@
        "!cp /content/drive/MyDrive/*.index /content/\n",
        "!cp /content/drive/MyDrive/*.npy /content/\n",
        "!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth"
-      ]
+      ],
+      "metadata": {
+        "id": "OVQoLQJXS7WX"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ZKAyuKb9J6dz"
-      },
-      "outputs": [],
      "source": [
        "#@title 手动预处理（不推荐）\n",
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "\n",
-        "!python3 trainset_preprocess_pipeline_print.py /content/dataset 32000 8 logs/{MODELNAME} True\n"
-      ]
+        "!python3 trainset_preprocess_pipeline_print.py /content/dataset 48000 8 logs/{MODELNAME} True\n"
+      ],
+      "metadata": {
+        "id": "ZKAyuKb9J6dz"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "CrxJqzAUKmPJ"
-      },
-      "outputs": [],
      "source": [
        "#@title 手动提取特征（不推荐）\n",
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "\n",
        "!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n"
-      ]
+      ],
+      "metadata": {
+        "id": "CrxJqzAUKmPJ"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "IMLPLKOaKj58"
-      },
-      "outputs": [],
      "source": [
        "#@title 手动训练（不推荐）\n",
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 停止的epoch\n",
-        "MODELEPOCH = 700  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 2500  #@param {type:\"integer\"}\n",
        "#@markdown 保存epoch间隔\n",
-        "EPOCHSAVE = 20  #@param {type:\"integer\"}\n",
+        "EPOCHSAVE = 100  #@param {type:\"integer\"}\n",
        "#@markdown 采样率\n",
        "MODELSAMPLE = \"48k\"  #@param {type:\"string\"}\n",
        "\n",
        "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs 32 -g 0 -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l 0 -c 1\n"
-      ]
+      ],
+      "metadata": {
+        "id": "IMLPLKOaKj58"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "haYA81hySuDl"
-      },
-      "outputs": [],
      "source": [
        "#@title 删除其它pth，只留选中的（慎点，仔细看代码）\n",
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 选中模型epoch\n",
-        "MODELEPOCH = 6600  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 7700  #@param {type:\"integer\"}\n",
        "\n",
        "!echo \"备份选中的模型。。。\"\n",
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
@ -251,21 +278,21 @@
        "\n",
        "!echo \"删除完成\"\n",
        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
-      ]
+      ],
+      "metadata": {
+        "id": "haYA81hySuDl"
+      },
+      "execution_count": null,
+      "outputs": []
    },
    {
      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "QhSiPTVPoIRh"
-      },
-      "outputs": [],
      "source": [
        "#@title 清除项目下所有文件，只留选中的模型（慎点，仔细看代码）\n",
        "#@markdown 模型名\n",
        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
        "#@markdown 选中模型epoch\n",
-        "MODELEPOCH = 1500  #@param {type:\"integer\"}\n",
+        "MODELEPOCH = 7700  #@param {type:\"integer\"}\n",
        "\n",
        "!echo \"备份选中的模型。。。\"\n",
        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
@ -281,24 +308,12 @@
        "\n",
        "!echo \"删除完成\"\n",
        "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}"
-      ]
+      ],
+      "metadata": {
+        "id": "QhSiPTVPoIRh"
+      },
+      "execution_count": null,
+      "outputs": []
    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "private_outputs": true,
-      "provenance": []
-    },
-    "gpuClass": "standard",
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
+  ]
+}
--- a/train/process_ckpt.py
+++ b/train/process_ckpt.py
@ -69,10 +69,11 @@ def merge(path1,path2,alpha1,sr,f0,info,name):
            return opt
        ckpt1 = torch.load(path1, map_location="cpu")
        ckpt2 = torch.load(path2, map_location="cpu")
-        if("model"in ckpt1):ckpt1=extract(ckpt1)
-        else:ckpt1=ckpt1["weight"]
-        if("model"in ckpt2):ckpt2=extract(ckpt2)
-        else:ckpt2=ckpt2["weight"]
+        cfg = ckpt1["config"]
+        if("model"in ckpt1): ckpt1=extract(ckpt1)
+        else: ckpt1=ckpt1["weight"]
+        if("model"in ckpt2): ckpt2=extract(ckpt2)
+        else: ckpt2=ckpt2["weight"]
        if(sorted(list(ckpt1.keys()))!=sorted(list(ckpt2.keys()))):return "Fail to merge the models. The model architectures are not the same."
        opt = OrderedDict()
        opt["weight"] = {}
@ -85,9 +86,12 @@ def merge(path1,path2,alpha1,sr,f0,info,name):
                    opt["weight"][key] = (alpha1*(ckpt1[key].float())+(1-alpha1)*(ckpt2[key].float())).half()
            # except:
            #     pdb.set_trace()
+        opt["config"] = cfg
+        '''
        if(sr=="40k"):opt["config"] = [1025, 32, 192, 192, 768, 2, 6, 3, 0, "1", [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 10, 2, 2], 512, [16, 16, 4, 4,4], 109, 256, 40000]
        elif(sr=="48k"):opt["config"] = [1025, 32, 192, 192, 768, 2, 6, 3, 0, "1", [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10,6,2,2,2], 512, [16, 16, 4, 4], 109, 256, 48000]
        elif(sr=="32k"):opt["config"] = [513, 32, 192, 192, 768, 2, 6, 3, 0, "1", [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 4, 2, 2, 2], 512, [16, 16, 4, 4,4], 109, 256, 32000]
+        '''
        opt["sr"]=sr
        opt["f0"]=1 if f0=="是"else 0
        opt["info"]=info