Merge pull request #4 from fumiama/main

增加colab笔记本以方便使用
2025-05-21 03:09:06 +08:00 · 2023-04-01 20:19:20 +08:00 · 2023-04-01 20:19:20 +08:00 · 4b039b098b
commit 4b039b098b
parent 2e3dc22472 10b74d70a5
30 changed files with 343 additions and 86 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
+.DS_Store
+__pycache__
+/TEMP
+*.pyd
+hubert_base.pt
+/logs
--- a/README.md
+++ b/README.md
@ -1,5 +1,7 @@
 # Retrieval-based-Voice-Conversion-WebUI

+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
+
 缺失的2个文件夹和2个文件：

 hubert_base.pt
--- a/Retrieval_based_Voice_Conversion_WebUI.ipynb
+++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb
@ -0,0 +1,211 @@
+{
+  "cells": [
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GmFP6bN9dvOq"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 查看显卡\n",
+        "!nvidia-smi"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wjddIFr1oS3W"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 安装依赖\n",
+        "!apt-get -y install build-essential python3-dev ffmpeg\n",
+        "!pip3 install --upgrade setuptools wheel\n",
+        "!pip3 install --upgrade pip\n",
+        "!pip3 install faiss-gpu fairseq gradio ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ge_97mfpgqTm"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 克隆仓库\n",
+        "\n",
+        "!git clone --depth=1 https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n",
+        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
+        "!mkdir -p pretrained uvr5_weights"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BLDEZADkvlw1"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 更新仓库（一般无需执行）\n",
+        "!git pull"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UG3XpUwEomUz"
+      },
+      "outputs": [],
+      "source": [
+        "!apt -y install -qq aria2\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n",
+        "\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth\n",
+        "\n",
+        "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Mwk7Q0Loqzjx"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
+        "\n",
+        "#@markdown 数据集位置\n",
+        "DATASET = \"/content/drive/MyDrive/dataset/lulu20230327.zip\"  #@param {type:\"string\"}\n",
+        "\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')\n",
+        "!mkdir -p /content/dataset\n",
+        "!unzip -d /content/dataset {DATASET}"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7vh6vphDwO0b"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 启动web\n",
+        "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
+        "!python3 infer-web.py --colab --pycmd python3"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FgJuNeAwx5Y_"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 手动将训练后的模型文件备份到谷歌云盘\n",
+        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
+        "\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 模型epoch\n",
+        "MODELEPOCH = 3540  #@param {type:\"integer\"}\n",
+        "\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n",
+        "\n",
+        "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OVQoLQJXS7WX"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 从谷歌云盘恢复pth\n",
+        "#@markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名\n",
+        "\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "#@markdown 模型epoch\n",
+        "MODELEPOCH = 730  #@param {type:\"integer\"}\n",
+        "\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n",
+        "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZKAyuKb9J6dz"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 手动训练（不推荐）\n",
+        "#@markdown 模型名\n",
+        "MODELNAME = \"lulu\"  #@param {type:\"string\"}\n",
+        "\n",
+        "!python3 trainset_preprocess_pipeline_print.py /content/dataset 32000 8 logs/{MODELNAME} True\n",
+        "\n",
+        "!python3 extract_feature_print.py 1 0 0 logs/{MODELNAME}\n",
+        "\n",
+        "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr 32k -f0 0 -g 0 -bs 4 -te 10 -se 5 -pg pretrained/G32k.pth -pd pretrained/D32k.pth -l 0 -c 0\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "private_outputs": true,
+      "provenance": []
+    },
+    "gpuClass": "standard",
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
--- a/config.py
+++ b/config.py
@ -1,3 +1,10 @@
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument("--port", type=int, default=7865, help="Listen port")
+parser.add_argument("--pycmd", type=str, default="python", help="Python command")
+parser.add_argument("--colab", action='store_true', help="Launch in colab")
+parser.add_argument("--noparallel", action='store_true', help="Disable parallel processing")
+cmd_opts = parser.parse_args()
 ############离线VC参数
 inp_root=r"白鹭霜华长条"#对输入目录下所有音频进行转换，别放非音频文件
 opt_root=r"opt"#输出目录
@ -7,10 +14,15 @@ person=r"weights\洛天依v3.pt"#目前只有洛天依v3
 device = "cuda:0"#填写cuda:x或cpu，x指代第几张卡，只支持N卡加速
 is_half=True#9-10-20-30-40系显卡无脑True，不影响质量，>=20显卡开启有加速
 n_cpu=0#默认0用上所有线程，写数字限制CPU资源使用
+############python命令路径
+python_cmd=cmd_opts.pycmd
+listen_port=cmd_opts.port
+iscolab=cmd_opts.colab
+noparallel=cmd_opts.noparallel
 ############下头别动
 import torch
 if(torch.cuda.is_available()==False):
-    print("没有发现支持的N卡，使用CPU进行推理")
+    print("没有发现支持的N卡, 使用CPU进行推理")
    device="cpu"
    is_half=False
 if(device!="cpu"):
--- a/extract_feature_print.py
+++ b/extract_feature_print.py
@ -1,17 +1,20 @@
 import os,sys,traceback
-n_part=int(sys.argv[1])
-i_part=int(sys.argv[2])
-i_gpu=sys.argv[3]
-exp_dir=sys.argv[4]
-os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
+if len(sys.argv) == 4:
+    n_part=int(sys.argv[1])
+    i_part=int(sys.argv[2])
+    exp_dir=sys.argv[3]
+else:
+    n_part=int(sys.argv[1])
+    i_part=int(sys.argv[2])
+    i_gpu=sys.argv[3]
+    exp_dir=sys.argv[4]
+    os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)

 import torch
 import torch.nn.functional as F
 import soundfile as sf
 import numpy as np
-import joblib
 from fairseq import checkpoint_utils
-import pdb
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

 f = open("%s/extract_f0_feature.log"%exp_dir, "a+")
@ -48,7 +51,8 @@ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
 )
 model = models[0]
 model = model.to(device)
-model = model.half()
+if torch.cuda.is_available():
+    model = model.half()
 model.eval()

 todo=sorted(list(os.listdir(wavPath)))[i_part::n_part]
@ -67,7 +71,7 @@ else:
                feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
                padding_mask = torch.BoolTensor(feats.shape).fill_(False)
                inputs = {
-                    "source": feats.half().to(device),
+                    "source": feats.half().to(device) if torch.cuda.is_available() else feats.to(device),
                    "padding_mask": padding_mask.to(device),
                    "output_layer": 9,  # layer 9
                }
--- a/infer-web.py
+++ b/infer-web.py
@ -1,9 +1,10 @@
 from multiprocessing import cpu_count
 import threading
 from time import sleep
-from subprocess import Popen,PIPE,run as runn
+from subprocess import Popen
 from time import sleep
-import torch, pdb, os,traceback,sys,warnings,shutil,numpy as np,faiss
+import torch, os,traceback,sys,warnings,shutil,numpy as np
+import faiss
 #判断是否有能用来训练和加速推理的N卡
 ncpu=cpu_count()
 ngpu=torch.cuda.device_count()
@ -33,11 +34,9 @@ from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFs
 from scipy.io import wavfile
 from fairseq import checkpoint_utils
 import gradio as gr
-import librosa
 import logging
 from vc_infer_pipeline import VC
-import soundfile as sf
-from config import is_half,device,is_half
+from config import is_half,device,is_half,python_cmd,listen_port,iscolab,noparallel
 from infer_uvr5 import _audio_pre_
 from my_utils import load_audio
 from train.process_ckpt import show_info,change_info,merge,extract_small_model
@ -64,9 +63,11 @@ def load_hubert():
 weight_root="weights"
 weight_uvr5_root="uvr5_weights"
 names=[]
-for name in os.listdir(weight_root):names.append(name)
+for name in os.listdir(weight_root):
+    if name.endswith(".pth"): names.append(name)
 uvr5_names=[]
-for name in os.listdir(weight_uvr5_root):uvr5_names.append(name.replace(".pth",""))
+for name in os.listdir(weight_uvr5_root):
+    if name.endswith(".pth"): uvr5_names.append(name.replace(".pth",""))

 def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,file_big_npy,index_rate):#spk_item, input_audio0, vc_transform0,f0_file,f0method0
    global tgt_sr,net_g,vc,hubert_model
@ -180,7 +181,11 @@ def get_vc(sid):
    n_spk=cpt["config"][-3]
    return {"visible": True,"maximum": n_spk, "__type__": "update"}

-def change_choices():return {"choices": sorted(list(os.listdir(weight_root))), "__type__": "update"}
+def change_choices():
+    names=[]
+    for name in os.listdir(weight_root):
+        if name.endswith(".pth"): names.append(name)
+    return {"choices": sorted(names), "__type__": "update"}
 def clean():return {"value": "", "__type__": "update"}
 def change_f0(if_f0_3,sr2):#np7, f0method8,pretrained_G14,pretrained_D15
    if(if_f0_3=="是"):return {"visible": True, "__type__": "update"},{"visible": True, "__type__": "update"},"pretrained/f0G%s.pth"%sr2,"pretrained/f0D%s.pth"%sr2
@ -217,7 +222,7 @@ def preprocess_dataset(trainset_dir,exp_dir,sr,n_p=ncpu):
    os.makedirs("%s/logs/%s"%(now_dir,exp_dir),exist_ok=True)
    f = open("%s/logs/%s/preprocess.log"%(now_dir,exp_dir), "w")
    f.close()
-    cmd="python trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s"%(trainset_dir,sr,n_p,now_dir,exp_dir)
+    cmd=python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "%(trainset_dir,sr,n_p,now_dir,exp_dir)+str(noparallel)
    print(cmd)
    p = Popen(cmd, shell=True)#, stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
    ###煞笔gr，popen read都非得全跑完了再一次性读取，不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
@ -237,7 +242,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir):
    f = open("%s/logs/%s/extract_f0_feature.log"%(now_dir,exp_dir), "w")
    f.close()
    if(if_f0=="是"):
-        cmd="python extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir,n_p,f0method)
+        cmd=python_cmd + " extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir,n_p,f0method)
        print(cmd)
        p = Popen(cmd, shell=True,cwd=now_dir)#, stdin=PIPE, stdout=PIPE,stderr=PIPE
        ###煞笔gr，popen read都非得全跑完了再一次性读取，不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
@ -261,7 +266,7 @@ def extract_f0_feature(gpus,n_p,f0method,if_f0,exp_dir):
    leng=len(gpus)
    ps=[]
    for idx,n_g in enumerate(gpus):
-        cmd="python extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir)
+        cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir)
        print(cmd)
        p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
        ps.append(p)
@ -300,8 +305,12 @@ def click_train(exp_dir1,sr2,if_f0_3,spk_id5,save_epoch10,total_epoch11,batch_si
    with open("%s/filelist.txt"%exp_dir,"w")as f:f.write("\n".join(opt))
    print("write filelist done")
    #生成config#无需生成config
-    # cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
-    cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
+    # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
+    print("use gpus:",gpus16)
+    if gpus16:
+        cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
+    else:
+        cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
    print(cmd)
    p = Popen(cmd, shell=True, cwd=now_dir)
    p.wait()
@ -346,7 +355,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
    os.makedirs("%s/logs/%s"%(now_dir,exp_dir1),exist_ok=True)
    #########step1:处理数据
    open("%s/logs/%s/preprocess.log"%(now_dir,exp_dir1), "w").close()
-    cmd="python trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s"%(trainset_dir4,sr_dict[sr2],ncpu,now_dir,exp_dir1)
+    cmd=python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "%(trainset_dir4,sr_dict[sr2],ncpu,now_dir,exp_dir1)+str(noparallel)
    yield get_info_str("step1:正在处理数据")
    yield get_info_str(cmd)
    p = Popen(cmd, shell=True)
@ -356,7 +365,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
    open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir1), "w")
    if(if_f0_3=="是"):
        yield get_info_str("step2a:正在提取音高")
-        cmd="python extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir1,np7,f0method8)
+        cmd=python_cmd + " extract_f0_print.py %s/logs/%s %s %s"%(now_dir,exp_dir1,np7,f0method8)
        yield get_info_str(cmd)
        p = Popen(cmd, shell=True,cwd=now_dir)
        p.wait()
@ -368,7 +377,7 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
    leng=len(gpus)
    ps=[]
    for idx,n_g in enumerate(gpus):
-        cmd="python extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1)
+        cmd=python_cmd + " extract_feature_print.py %s %s %s %s/logs/%s"%(leng,idx,n_g,now_dir,exp_dir1)
        yield get_info_str(cmd)
        p = Popen(cmd, shell=True, cwd=now_dir)#, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
        ps.append(p)
@ -394,7 +403,10 @@ def train1key(exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0meth
            opt.append("%s/%s.wav|%s/%s.npy|%s"%(gt_wavs_dir.replace("\\","\\\\"),name,co256_dir.replace("\\","\\\\"),name,spk_id5))
    with open("%s/filelist.txt"%exp_dir,"w")as f:f.write("\n".join(opt))
    yield get_info_str("write filelist done")
-    cmd = "python train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
+    if gpus16:
+        cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,gpus16,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
+    else:
+        cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s -pg %s -pd %s -l %s -c %s" % (exp_dir1,sr2,1 if if_f0_3=="是"else 0,batch_size12,total_epoch11,save_epoch10,pretrained_G14,pretrained_D15,1 if if_save_latest13=="是"else 0,1 if if_cache_gpu17=="是"else 0)
    yield get_info_str(cmd)
    p = Popen(cmd, shell=True, cwd=now_dir)
    p.wait()
@ -443,7 +455,7 @@ with gr.Blocks() as app:
    with gr.Tabs():
        with gr.TabItem("模型推理"):
            with gr.Row():
-                sid0 = gr.Dropdown(label="推理音色", choices=names)
+                sid0 = gr.Dropdown(label="推理音色", choices=sorted(names))
                refresh_button = gr.Button("刷新音色列表", variant="primary")
                refresh_button.click(
                    fn=change_choices,
@ -625,6 +637,7 @@ with gr.Blocks() as app:
        with gr.TabItem("点击查看交流、问题反馈群号"):
            gr.Markdown(value="""xxxxx""")

-    # app.launch(server_name="0.0.0.0",server_port=7860)
-    # app.queue(concurrency_count=511, max_size=1022).launch(server_name="127.0.0.1",inbrowser=True,server_port=7861,quiet=True)
-    app.queue(concurrency_count=511, max_size=1022).launch(server_name="0.0.0.0",inbrowser=True,server_port=7865,quiet=True)
+    if iscolab:
+        app.queue(concurrency_count=511, max_size=1022).launch(share=True)
+    else:
+        app.queue(concurrency_count=511, max_size=1022).launch(server_name="0.0.0.0",inbrowser=True,server_port=listen_port,quiet=True)
--- a/infer_pack/pycache/attentions.cpython-39.pyc
+++ b/infer_pack/pycache/attentions.cpython-39.pyc
--- a/infer_pack/pycache/commons.cpython-39.pyc
+++ b/infer_pack/pycache/commons.cpython-39.pyc
--- a/infer_pack/pycache/models.cpython-39.pyc
+++ b/infer_pack/pycache/models.cpython-39.pyc
--- a/infer_pack/pycache/modules.cpython-39.pyc
+++ b/infer_pack/pycache/modules.cpython-39.pyc
--- a/infer_pack/pycache/transforms.cpython-39.pyc
+++ b/infer_pack/pycache/transforms.cpython-39.pyc
--- a/my_utils.py
+++ b/my_utils.py
@ -1,4 +1,5 @@
-import ffmpeg,numpy as np
+import ffmpeg
+import numpy as np
 def load_audio(file,sr):
    try:
        # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
@ -7,12 +8,9 @@ def load_audio(file,sr):
        out, _ = (
            ffmpeg.input(file, threads=0)
            .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
-            .run(cmd=["./ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+            .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
        )
-    except ffmpeg.Error as e:
-        raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
+    except Exception as e:
+        raise RuntimeError(f"Failed to load audio: {e}")

    return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
-
-if __name__=='__main__' :
-    print(load_audio(r"C:\CloudMusic\宮野幸子,森下唯 - 月夜に謳う君 -LUNA-.mp3",16000).shape)
--- a/pretrained/.gitignore
+++ b/pretrained/.gitignore
@ -0,0 +1,2 @@
+*
+!.gitignore
--- a/slicer2.py
+++ b/slicer2.py
@ -4,7 +4,6 @@ import numpy as np
 # This function is obtained from librosa.
 def get_rms(
    y,
-    *,
    frame_length=2048,
    hop_length=512,
    pad_mode="constant",
--- a/train/pycache/data_utils.cpython-39.pyc
+++ b/train/pycache/data_utils.cpython-39.pyc
--- a/train/pycache/losses.cpython-39.pyc
+++ b/train/pycache/losses.cpython-39.pyc
--- a/train/pycache/mel_processing.cpython-39.pyc
+++ b/train/pycache/mel_processing.cpython-39.pyc
--- a/train/pycache/process_ckpt.cpython-39.pyc
+++ b/train/pycache/process_ckpt.cpython-39.pyc
--- a/train/pycache/utils.cpython-39.pyc
+++ b/train/pycache/utils.cpython-39.pyc
--- a/train_nsf_sim_cache_sid_load_pretrain.py
+++ b/train_nsf_sim_cache_sid_load_pretrain.py
@ -34,9 +34,6 @@ global_step = 0


 def main():
-    """Assume Single Node Multi GPUs Training Only"""
-    assert torch.cuda.is_available(), "CPU training is not allowed."
-
    # n_gpus = torch.cuda.device_count()
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = "5555"
@ -65,7 +62,7 @@ def run(rank, n_gpus, hps):
        backend="gloo", init_method="env://", world_size=n_gpus, rank=rank
    )
    torch.manual_seed(hps.train.seed)
-    torch.cuda.set_device(rank)
+    if torch.cuda.is_available(): torch.cuda.set_device(rank)

    if (hps.if_f0 == 1):train_dataset = TextAudioLoaderMultiNSFsid(hps.data.training_files, hps.data)
    else:train_dataset = TextAudioLoader(hps.data.training_files, hps.data)
@ -92,9 +89,13 @@ def run(rank, n_gpus, hps):
        persistent_workers=True,
        prefetch_factor=8,
    )
-    if(hps.if_f0==1):net_g = SynthesizerTrnMs256NSFsid(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run,sr=hps.sample_rate).cuda(rank)
-    else:net_g = SynthesizerTrnMs256NSFsid_nono(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run).cuda(rank)
-    net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank)
+    if(hps.if_f0==1):
+        net_g = SynthesizerTrnMs256NSFsid(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run,sr=hps.sample_rate)
+    else:
+        net_g = SynthesizerTrnMs256NSFsid_nono(hps.data.filter_length // 2 + 1,hps.train.segment_size // hps.data.hop_length,**hps.model,is_half=hps.train.fp16_run)
+    if torch.cuda.is_available(): net_g = net_g.cuda(rank)
+    net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm)
+    if torch.cuda.is_available(): net_d = net_d.cuda(rank)
    optim_g = torch.optim.AdamW(
        net_g.parameters(),
        hps.train.learning_rate,
@ -109,8 +110,12 @@ def run(rank, n_gpus, hps):
    )
    # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
    # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
-    net_g = DDP(net_g, device_ids=[rank])
-    net_d = DDP(net_d, device_ids=[rank])
+    if torch.cuda.is_available(): 
+        net_g = DDP(net_g, device_ids=[rank])
+        net_d = DDP(net_d, device_ids=[rank])
+    else:
+        net_g = DDP(net_g)
+        net_d = DDP(net_d)

    try:#如果能加载自动resume
        _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d)  # D多半加载没事
@ -190,11 +195,12 @@ def train_and_evaluate(
        for batch_idx, info in enumerate(train_loader):
            if (hps.if_f0 == 1):phone,phone_lengths,pitch,pitchf,spec,spec_lengths,wave,wave_lengths,sid=info
            else:phone,phone_lengths,spec,spec_lengths,wave,wave_lengths,sid=info
-            phone, phone_lengths = phone.cuda(rank, non_blocking=True),phone_lengths.cuda(rank, non_blocking=True )
-            if (hps.if_f0 == 1):pitch,pitchf = pitch.cuda(rank, non_blocking=True),pitchf.cuda(rank, non_blocking=True)
-            sid = sid.cuda(rank, non_blocking=True)
-            spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True)
-            wave, wave_lengths = wave.cuda(rank, non_blocking=True), wave_lengths.cuda(rank, non_blocking=True)
+            if torch.cuda.is_available():
+                phone, phone_lengths = phone.cuda(rank, non_blocking=True), phone_lengths.cuda(rank, non_blocking=True )
+                if (hps.if_f0 == 1):pitch,pitchf = pitch.cuda(rank, non_blocking=True),pitchf.cuda(rank, non_blocking=True)
+                sid = sid.cuda(rank, non_blocking=True)
+                spec, spec_lengths = spec.cuda(rank, non_blocking=True), spec_lengths.cuda(rank, non_blocking=True)
+                wave, wave_lengths = wave.cuda(rank, non_blocking=True), wave_lengths.cuda(rank, non_blocking=True)
            if(hps.if_cache_data_in_gpu==True):
                if (hps.if_f0 == 1):cache.append((batch_idx, (phone,phone_lengths,pitch,pitchf,spec,spec_lengths,wave,wave_lengths ,sid)))
                else:cache.append((batch_idx, (phone,phone_lengths,spec,spec_lengths,wave,wave_lengths ,sid)))
--- a/trainset_preprocess_pipeline_print.py
+++ b/trainset_preprocess_pipeline_print.py
@ -1,4 +1,4 @@
-import sys,os,pdb,multiprocessing
+import sys,os,multiprocessing
 now_dir=os.getcwd()
 sys.path.append(now_dir)

@ -6,20 +6,15 @@ inp_root = sys.argv[1]
 sr = int(sys.argv[2])
 n_p = int(sys.argv[3])
 exp_dir = sys.argv[4]
-import numpy as np,ffmpeg,os,traceback
+noparallel = sys.argv[5] == "True"
+import numpy as np,os,traceback
 from slicer2 import Slicer
-from joblib import Parallel, delayed
 import librosa,traceback
 from  scipy.io import wavfile
 import multiprocessing
 from my_utils import load_audio
-from time import sleep

-f = open("%s/preprocess.log"%exp_dir, "a+")
-def printt(strr):
-    print(strr)
-    f.write("%s\n" % strr)
-    f.flush()
+mutex = multiprocessing.Lock()

 class PreProcess():
    def __init__(self,sr,exp_dir):
@ -40,10 +35,18 @@ class PreProcess():
        self.exp_dir=exp_dir
        self.gt_wavs_dir="%s/0_gt_wavs"%exp_dir
        self.wavs16k_dir="%s/1_16k_wavs"%exp_dir
+        self.f = open("%s/preprocess.log"%exp_dir, "a+")
        os.makedirs(self.exp_dir,exist_ok=True)
        os.makedirs(self.gt_wavs_dir,exist_ok=True)
        os.makedirs(self.wavs16k_dir,exist_ok=True)

+    def print(self, strr):
+        mutex.acquire()
+        print(strr)
+        self.f.write("%s\n" % strr)
+        self.f.flush()
+        mutex.release()
+
    def norm_write(self,tmp_audio,idx0,idx1):
        tmp_audio = (tmp_audio / np.abs(tmp_audio).max() * (self.max * self.alpha)) + (1 - self.alpha) * tmp_audio
        wavfile.write("%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1), self.sr, (tmp_audio*32768).astype(np.int16))
@ -67,9 +70,9 @@ class PreProcess():
                        tmp_audio = audio[start:]
                        break
                self.norm_write(tmp_audio, idx0, idx1)
-            printt("%s->Suc."%path)
+            self.print("%s->Suc."%path)
        except:
-            printt("%s->%s"%(path,traceback.format_exc()))
+            self.print("%s->%s"%(path,traceback.format_exc()))

    def pipeline_mp(self,infos):
        for path, idx0 in infos:
@ -78,27 +81,24 @@ class PreProcess():
    def pipeline_mp_inp_dir(self,inp_root,n_p):
        try:
            infos = [("%s/%s" % (inp_root, name), idx) for idx, name in enumerate(sorted(list(os.listdir(inp_root))))]
-            ps=[]
-            for i in range(n_p):
-                p=multiprocessing.Process(target=self.pipeline_mp,args=(infos[i::n_p],))
-                p.start()
-                ps.append(p)
-            for p in ps:p.join()
+            if noparallel:
+                for i in range(n_p): self.pipeline_mp(infos[i::n_p])
+            else:
+                ps=[]
+                for i in range(n_p):
+                    p=multiprocessing.Process(target=self.pipeline_mp,args=(infos[i::n_p],))
+                    p.start()
+                    ps.append(p)
+                    for p in ps:p.join()
        except:
-            printt("Fail. %s"%traceback.format_exc())
+            self.print("Fail. %s"%traceback.format_exc())
+
+def preprocess_trainset(inp_root, sr, n_p, exp_dir):
+    pp=PreProcess(sr,exp_dir)
+    pp.print("start preprocess")
+    pp.print(sys.argv)
+    pp.pipeline_mp_inp_dir(inp_root,n_p)
+    pp.print("end preprocess")

 if __name__=='__main__':
-    # f = open("logs/log_preprocess.log", "w")
-    printt(sys.argv)
-    ######################################################
-    # inp_root=r"E:\语音音频+标注\米津玄师\src"
-    # inp_root=r"E:\codes\py39\vits_vc_gpu_train\todo-songs"
-    # sr=40000
-    # n_p = 6
-    # exp_dir=r"E:\codes\py39\dataset\mi-test"
-
-    ######################################################
-    printt("start preprocess")
-    pp=PreProcess(sr,exp_dir)
-    pp.pipeline_mp_inp_dir(inp_root,n_p)
-    printt("end preprocess")
+    preprocess_trainset(inp_root, sr, n_p, exp_dir)
--- a/uvr5_pack/pycache/utils.cpython-39.pyc
+++ b/uvr5_pack/pycache/utils.cpython-39.pyc
--- a/uvr5_pack/lib_v5/pycache/layers_123821KB.cpython-39.pyc
+++ b/uvr5_pack/lib_v5/pycache/layers_123821KB.cpython-39.pyc
--- a/uvr5_pack/lib_v5/pycache/model_param_init.cpython-39.pyc
+++ b/uvr5_pack/lib_v5/pycache/model_param_init.cpython-39.pyc
--- a/uvr5_pack/lib_v5/pycache/nets_61968KB.cpython-39.pyc
+++ b/uvr5_pack/lib_v5/pycache/nets_61968KB.cpython-39.pyc
--- a/uvr5_pack/lib_v5/pycache/spec_utils.cpython-39.pyc
+++ b/uvr5_pack/lib_v5/pycache/spec_utils.cpython-39.pyc
--- a/uvr5_weights/.gitignore
+++ b/uvr5_weights/.gitignore
@ -0,0 +1,2 @@
+*
+!.gitignore
--- a/weights/...
+++ b/weights/...
@ -1 +0,0 @@
-
--- a/weights/.gitignore
+++ b/weights/.gitignore
@ -0,0 +1,2 @@
+*
+!.gitignore
--- a/使用需遵守的协议-LICENSE.txt
+++ b/使用需遵守的协议-LICENSE.txt
@ -1,6 +1,7 @@
 MIT License

 Copyright (c) 2023 liujing04
+Copyright (c) 2023 源文雨

        本软件及其相关代码以MIT协议开源，作者不对软件具备任何控制力，使用软件者、传播软件导出的声音者自负全责。
        如不认可该条款，则不能使用或引用软件包内任何代码和文件。