From def42110c089cb739be7a27334c39c7011085a8e Mon Sep 17 00:00:00 2001 From: Rice Cake Date: Mon, 10 Apr 2023 16:45:47 +0800 Subject: [PATCH 01/11] Update README_en.md --- README_en.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README_en.md b/README_en.md index 6fe55e8..2bc15c1 100644 --- a/README_en.md +++ b/README_en.md @@ -22,6 +22,7 @@ This repository has the following features: + Supporting model fusion to change timbres; + Easy-to-use Webui interface; + Use the UVR5 model to quickly separate vocals and instruments. ++ The dataset for the pre-training model uses nearly 50 hours of high quality VCTK open source, and high quality licensed song datasets will be added one after another for your use, without worrying about copyright infringement. ## Preparing the environment We recommend you install the dependencies through poetry. From d7c0cda07ef8d80b29e1159fa40e37e95256cbc4 Mon Sep 17 00:00:00 2001 From: Rice Cake Date: Mon, 10 Apr 2023 16:48:10 +0800 Subject: [PATCH 02/11] Update README_en.md --- README_en.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README_en.md b/README_en.md index 2bc15c1..aec06ac 100644 --- a/README_en.md +++ b/README_en.md @@ -32,6 +32,10 @@ The following commands need to be executed in the environment of Python version # Reference: https://pytorch.org/get-started/locally/ pip install torch torchvision torchaudio +#For Win + 30-series Nvidia cards, you need to specify the cuda version corresponding to pytorch according to the experience of https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/issues/21 + +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 + # Install the Poetry dependency management tool, skip if installed # Reference: https://python-poetry.org/docs/#installation curl -sSL https://install.python-poetry.org | python3 - From 3ccae97cc7a3b142af5c74f25e9b029201068294 Mon Sep 17 00:00:00 2001 From: Rice Cake Date: Mon, 10 Apr 2023 16:48:49 +0800 Subject: [PATCH 03/11] Update README_en.md --- README_en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_en.md b/README_en.md index aec06ac..bb7c4ec 100644 --- a/README_en.md +++ b/README_en.md @@ -32,7 +32,7 @@ The following commands need to be executed in the environment of Python version # Reference: https://pytorch.org/get-started/locally/ pip install torch torchvision torchaudio -#For Win + 30-series Nvidia cards, you need to specify the cuda version corresponding to pytorch according to the experience of https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/issues/21 +#For Windows + 30-series Nvidia cards, you need to specify the cuda version corresponding to pytorch according to the experience of https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/issues/21 pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 From ff1a711cad23140298909076b1cbc95340960f7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:28:39 +0800 Subject: [PATCH 04/11] =?UTF-8?q?fix:=20MacOS=20=E7=BA=AF=20CPU=20?= =?UTF-8?q?=E6=8E=A8=E7=90=86=E6=97=B6=20Segmentation=20fault:=2011=20see:?= =?UTF-8?q?=20facebookresearch/faiss#2317=20facebookresearch#2410?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Retrieval_based_Voice_Conversion_WebUI.ipynb | 31 ++++++++++++++------ infer-web.py | 8 ++--- infer/infer-pm-index256.py | 8 ++--- requirements.txt | 2 +- vc_infer_pipeline.py | 11 +++++-- 5 files changed, 39 insertions(+), 21 deletions(-) diff --git a/Retrieval_based_Voice_Conversion_WebUI.ipynb b/Retrieval_based_Voice_Conversion_WebUI.ipynb index eb549d8..a47f3b4 100644 --- a/Retrieval_based_Voice_Conversion_WebUI.ipynb +++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb @@ -129,10 +129,10 @@ "#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n", "\n", "#@markdown 数据集位置\n", - "DATASET = \"/content/drive/MyDrive/dataset/lulucall_48k.zip\" #@param {type:\"string\"}\n", + "DATASET = \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\" #@param {type:\"string\"}\n", "\n", "!mkdir -p /content/dataset\n", - "!unzip -d /content/dataset {DATASET}" + "!unzip -d /content/dataset -B {DATASET}" ], "metadata": { "id": "Mwk7Q0Loqzjx" @@ -140,13 +140,26 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "code", + "source": [ + "#@title 重命名数据集中的重名文件\n", + "!ls -a /content/dataset/\n", + "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*" + ], + "metadata": { + "id": "PDlFxWHWEynD" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ "#@title 启动web\n", "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", - "%load_ext tensorboard\n", - "%tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n", + "# %load_ext tensorboard\n", + "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n", "!python3 infer-web.py --colab --pycmd python3" ], "metadata": { @@ -164,7 +177,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 模型epoch\n", - "MODELEPOCH = 7500 #@param {type:\"integer\"}\n", + "MODELEPOCH = 9600 #@param {type:\"integer\"}\n", "\n", "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n", "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n", @@ -188,7 +201,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 模型epoch\n", - "MODELEPOCH = 6000 #@param {type:\"integer\"}\n", + "MODELEPOCH = 7500 #@param {type:\"integer\"}\n", "\n", "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", "\n", @@ -241,7 +254,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 停止的epoch\n", - "MODELEPOCH = 2500 #@param {type:\"integer\"}\n", + "MODELEPOCH = 3200 #@param {type:\"integer\"}\n", "#@markdown 保存epoch间隔\n", "EPOCHSAVE = 100 #@param {type:\"integer\"}\n", "#@markdown 采样率\n", @@ -262,7 +275,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 选中模型epoch\n", - "MODELEPOCH = 7700 #@param {type:\"integer\"}\n", + "MODELEPOCH = 9600 #@param {type:\"integer\"}\n", "\n", "!echo \"备份选中的模型。。。\"\n", "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", @@ -292,7 +305,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 选中模型epoch\n", - "MODELEPOCH = 7700 #@param {type:\"integer\"}\n", + "MODELEPOCH = 9600 #@param {type:\"integer\"}\n", "\n", "!echo \"备份选中的模型。。。\"\n", "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", diff --git a/infer-web.py b/infer-web.py index c838a2a..3067658 100644 --- a/infer-web.py +++ b/infer-web.py @@ -9,7 +9,7 @@ import faiss ncpu=cpu_count() ngpu=torch.cuda.device_count() gpu_infos=[] -if(torch.cuda.is_available()==False or ngpu==0):if_gpu_ok=False +if((not torch.cuda.is_available()) or ngpu==0):if_gpu_ok=False else: if_gpu_ok = False for i in range(ngpu): @@ -140,7 +140,7 @@ def uvr(model_name,inp_root,save_root_vocal,paths,save_root_ins): except: traceback.print_exc() print("clean_empty_cache") - torch.cuda.empty_cache() + if torch.cuda.is_available(): torch.cuda.empty_cache() yield "\n".join(infos) #一个选项卡全局只能有一个音色 @@ -152,7 +152,7 @@ def get_vc(sid): print("clean_empty_cache") del net_g, n_spk, vc, hubert_model,tgt_sr#,cpt hubert_model = net_g=n_spk=vc=hubert_model=tgt_sr=None - torch.cuda.empty_cache() + if torch.cuda.is_available(): torch.cuda.empty_cache() ###楼下不这么折腾清理不干净 if_f0 = cpt.get("f0", 1) if (if_f0 == 1): @@ -160,7 +160,7 @@ def get_vc(sid): else: net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) del net_g,cpt - torch.cuda.empty_cache() + if torch.cuda.is_available(): torch.cuda.empty_cache() cpt=None return {"visible": False, "__type__": "update"} person = "%s/%s" % (weight_root, sid) diff --git a/infer/infer-pm-index256.py b/infer/infer-pm-index256.py index dd94834..add0245 100644 --- a/infer/infer-pm-index256.py +++ b/infer/infer-pm-index256.py @@ -104,7 +104,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):## "padding_mask": padding_mask.to(device), "output_layer": 9, # layer 9 } - torch.cuda.synchronize() + if torch.cuda.is_available(): torch.cuda.synchronize() t0=ttime() with torch.no_grad(): logits = model.extract_features(**inputs) @@ -116,13 +116,13 @@ for idx,name in enumerate(["冬之花clip1.wav",]):## feats = torch.from_numpy(big_npy[I.squeeze()].astype("float16")).unsqueeze(0).to(device) feats=F.interpolate(feats.permute(0,2,1),scale_factor=2).permute(0,2,1) - torch.cuda.synchronize() + if torch.cuda.is_available(): torch.cuda.synchronize() t1=ttime() # p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存 p_len = min(feats.shape[1],10000)# pitch, pitchf = get_f0(audio, p_len,f0_up_key) p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存 - torch.cuda.synchronize() + if torch.cuda.is_available(): torch.cuda.synchronize() t2=ttime() feats = feats[:,:p_len, :] pitch = pitch[:p_len] @@ -133,7 +133,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):## pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device) with torch.no_grad(): audio = net_g.infer(feats, p_len,pitch,pitchf,sid)[0][0, 0].data.cpu().float().numpy()#nsf - torch.cuda.synchronize() + if torch.cuda.is_available(): torch.cuda.synchronize() t3=ttime() ta0+=(t1-t0) ta1+=(t2-t1) diff --git a/requirements.txt b/requirements.txt index e00ebda..82c73f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ scipy==1.9.3 librosa==0.9.2 llvmlite==0.39.0 fairseq==0.12.2 -faiss-cpu==1.7.2 +faiss-cpu==1.7.0 gradio Cython future>=0.18.3 diff --git a/vc_infer_pipeline.py b/vc_infer_pipeline.py index 30b03e3..c15ce4e 100644 --- a/vc_infer_pipeline.py +++ b/vc_infer_pipeline.py @@ -72,6 +72,7 @@ class VC(object): "output_layer": 9, # layer 9 } t0 = ttime() + print("vc npy start time:", t0) with torch.no_grad(): logits = model.extract_features(**inputs) feats = model.final_proj(logits[0]) @@ -79,13 +80,14 @@ class VC(object): if(isinstance(index,type(None))==False and isinstance(big_npy,type(None))==False and index_rate!=0): npy = feats[0].cpu().numpy() if(self.is_half==True):npy=npy.astype("float32") - D, I = index.search(npy, 1) + _, I = index.search(npy, 1) npy=big_npy[I.squeeze()] if(self.is_half==True):npy=npy.astype("float16") feats = torch.from_numpy(npy).unsqueeze(0).to(self.device)*index_rate + (1-index_rate)*feats feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) t1 = ttime() + print("vc infer start time:", t1) p_len = audio0.shape[0]//self.window if(feats.shape[1] Date: Mon, 10 Apr 2023 18:34:10 +0800 Subject: [PATCH 05/11] =?UTF-8?q?=E4=BC=98=E5=8C=96print?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- infer-web.py | 2 +- vc_infer_pipeline.py | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/infer-web.py b/infer-web.py index 3067658..d2cd506 100644 --- a/infer-web.py +++ b/infer-web.py @@ -79,7 +79,7 @@ def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,file_big_np if(hubert_model==None):load_hubert() if_f0 = cpt.get("f0", 1) audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file) - print(times) + print("npy: ", times[0], "s, f0:", times[1], "s, infer: ", times[2], "s", sep='') return "Success", (tgt_sr, audio_opt) except: info=traceback.format_exc() diff --git a/vc_infer_pipeline.py b/vc_infer_pipeline.py index c15ce4e..e05ef4c 100644 --- a/vc_infer_pipeline.py +++ b/vc_infer_pipeline.py @@ -72,7 +72,6 @@ class VC(object): "output_layer": 9, # layer 9 } t0 = ttime() - print("vc npy start time:", t0) with torch.no_grad(): logits = model.extract_features(**inputs) feats = model.final_proj(logits[0]) @@ -87,7 +86,6 @@ class VC(object): feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) t1 = ttime() - print("vc infer start time:", t1) p_len = audio0.shape[0]//self.window if(feats.shape[1] Date: Mon, 10 Apr 2023 18:46:58 +0800 Subject: [PATCH 06/11] edit README --- README.md | 4 ++++ README_en.md | 3 +++ requirements.txt | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b93d9dc..2de4373 100644 --- a/README.md +++ b/README.md @@ -47,9 +47,13 @@ poetry install ``` 你也可以通过pip来安装依赖: + +**注意**: `MacOS`下`faiss 1.7.2`版本会导致抛出段错误,请将`requirements.txt`的对应条目改为`faiss-cpu==1.7.0` + ```bash pip install -r requirements.txt ``` + ## 其他预模型准备 RVC需要其他的一些预模型来推理和训练。 diff --git a/README_en.md b/README_en.md index 6fe55e8..8e14e6b 100644 --- a/README_en.md +++ b/README_en.md @@ -40,9 +40,12 @@ poetry install ``` You can also use pip to install the dependencies +**Notice**: `faiss 1.7.2` will raise Segmentation Fault: 11 under `MacOS`, please change corresponding line in `requirements.txt` to `faiss-cpu==1.7.0` + ```bash pip install -r requirements.txt ``` + ## Preparation of other Pre-models RVC requires other pre-models to infer and train. diff --git a/requirements.txt b/requirements.txt index 82c73f4..e00ebda 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ scipy==1.9.3 librosa==0.9.2 llvmlite==0.39.0 fairseq==0.12.2 -faiss-cpu==1.7.0 +faiss-cpu==1.7.2 gradio Cython future>=0.18.3 From 1c1ee8ebc15b260c9d560003c7d1fedd75059549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:51:15 +0800 Subject: [PATCH 07/11] =?UTF-8?q?=E7=BE=8E=E5=8C=96=E7=95=8C=E9=9D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2de4373..3c6b4a9 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,28 @@ -# Retrieval-based-Voice-Conversion-WebUI +
+ +

Retrieval-based-Voice-Conversion-WebUI

+一个基于VITS的简单易用的语音转换(变声器)框架。

[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI) +
+ [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) [![Licence](https://img.shields.io/github/license/liujing04/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/%E4%BD%BF%E7%94%A8%E9%9C%80%E9%81%B5%E5%AE%88%E7%9A%84%E5%8D%8F%E8%AE%AE-LICENSE.txt) [![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-blue.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/) -### 使用了RVC的实时语音转换 : [w-okada/voice-changer](https://github.com/w-okada/voice-changer) ------- +
-一个基于VITS的简单易用的语音转换(变声器)框架。 +------ [**更新日志**](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Changelog_CN.md) [**English**](./README_en.md) | [**中文简体**](./README.md) > 点此查看我们的[演示视频](https://www.bilibili.com/video/BV1pm4y1z7Gm/) ! + +> 使用了RVC的实时语音转换: [w-okada/voice-changer](https://github.com/w-okada/voice-changer) + ## 简介 本仓库具有以下特点: + 使用top1特征模型检索来杜绝音色泄漏; From 1ac11aee46176a8800d41eeba8d9769f13f50c94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:52:30 +0800 Subject: [PATCH 08/11] add counter --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 3c6b4a9..5009de3 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@
+
+ [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) [![Licence](https://img.shields.io/github/license/liujing04/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/%E4%BD%BF%E7%94%A8%E9%9C%80%E9%81%B5%E5%AE%88%E7%9A%84%E5%8D%8F%E8%AE%AE-LICENSE.txt) [![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-blue.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/) From 342cb735bbdf77db59e4fe7ea8ac2a3bce4c8c71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:53:25 +0800 Subject: [PATCH 09/11] edit README --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 5009de3..a2bc7ec 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,6 @@ [![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI) -
-
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) From 45d4daba2ce19362b429766af954e107bc1529c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:54:02 +0800 Subject: [PATCH 10/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2bc7ec..7e629d2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@

Retrieval-based-Voice-Conversion-WebUI

-一个基于VITS的简单易用的语音转换(变声器)框架。

+一个基于VITS的简单易用的语音转换(变声器)框架

[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI) From 1ad3f6ef1224c49b70371d1a963d190c09f54f7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:55:04 +0800 Subject: [PATCH 11/11] remove old README --- README_v0.md | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 README_v0.md diff --git a/README_v0.md b/README_v0.md deleted file mode 100644 index 237679d..0000000 --- a/README_v0.md +++ /dev/null @@ -1,32 +0,0 @@ -# Retrieval-based-Voice-Conversion-WebUI - -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) - -缺失的2个文件夹和2个文件: - -hubert_base.pt - -ffmpeg(自己确保ffmpeg命令能执行就行) - -pretrained文件夹 - -uvr5_weights文件夹 - -文件太大github传不动,去huggingface上下https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main - -当然你也可以直接看看RVC-beta.7z这个文件→_→ - -按照requirements.txt用pip装好环境,python infer-web.py就能用了 - -根据经验,librosa numpy和numba三个包最好写死版本否则容易有坑,其他的包版本不太重要 - -宣传视频:https://www.bilibili.com/video/BV1pm4y1z7Gm/ - -教程见小白简易教程.doc - -We will develop an English version windows WebUI APP in 2 weeks. - - -### Realtime Voice Conversion Software using RVC - -https://github.com/w-okada/voice-changer