From ff1a711cad23140298909076b1cbc95340960f7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:28:39 +0800 Subject: [PATCH 1/8] =?UTF-8?q?fix:=20MacOS=20=E7=BA=AF=20CPU=20=E6=8E=A8?= =?UTF-8?q?=E7=90=86=E6=97=B6=20Segmentation=20fault:=2011=20see:=20facebo?= =?UTF-8?q?okresearch/faiss#2317=20facebookresearch#2410?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Retrieval_based_Voice_Conversion_WebUI.ipynb | 31 ++++++++++++++------ infer-web.py | 8 ++--- infer/infer-pm-index256.py | 8 ++--- requirements.txt | 2 +- vc_infer_pipeline.py | 11 +++++-- 5 files changed, 39 insertions(+), 21 deletions(-) diff --git a/Retrieval_based_Voice_Conversion_WebUI.ipynb b/Retrieval_based_Voice_Conversion_WebUI.ipynb index eb549d8..a47f3b4 100644 --- a/Retrieval_based_Voice_Conversion_WebUI.ipynb +++ b/Retrieval_based_Voice_Conversion_WebUI.ipynb @@ -129,10 +129,10 @@ "#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n", "\n", "#@markdown 数据集位置\n", - "DATASET = \"/content/drive/MyDrive/dataset/lulucall_48k.zip\" #@param {type:\"string\"}\n", + "DATASET = \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\" #@param {type:\"string\"}\n", "\n", "!mkdir -p /content/dataset\n", - "!unzip -d /content/dataset {DATASET}" + "!unzip -d /content/dataset -B {DATASET}" ], "metadata": { "id": "Mwk7Q0Loqzjx" @@ -140,13 +140,26 @@ "execution_count": null, "outputs": [] }, + { + "cell_type": "code", + "source": [ + "#@title 重命名数据集中的重名文件\n", + "!ls -a /content/dataset/\n", + "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*" + ], + "metadata": { + "id": "PDlFxWHWEynD" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "source": [ "#@title 启动web\n", "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", - "%load_ext tensorboard\n", - "%tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n", + "# %load_ext tensorboard\n", + "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n", "!python3 infer-web.py --colab --pycmd python3" ], "metadata": { @@ -164,7 +177,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 模型epoch\n", - "MODELEPOCH = 7500 #@param {type:\"integer\"}\n", + "MODELEPOCH = 9600 #@param {type:\"integer\"}\n", "\n", "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n", "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n", @@ -188,7 +201,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 模型epoch\n", - "MODELEPOCH = 6000 #@param {type:\"integer\"}\n", + "MODELEPOCH = 7500 #@param {type:\"integer\"}\n", "\n", "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", "\n", @@ -241,7 +254,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 停止的epoch\n", - "MODELEPOCH = 2500 #@param {type:\"integer\"}\n", + "MODELEPOCH = 3200 #@param {type:\"integer\"}\n", "#@markdown 保存epoch间隔\n", "EPOCHSAVE = 100 #@param {type:\"integer\"}\n", "#@markdown 采样率\n", @@ -262,7 +275,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 选中模型epoch\n", - "MODELEPOCH = 7700 #@param {type:\"integer\"}\n", + "MODELEPOCH = 9600 #@param {type:\"integer\"}\n", "\n", "!echo \"备份选中的模型。。。\"\n", "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", @@ -292,7 +305,7 @@ "#@markdown 模型名\n", "MODELNAME = \"lulu\" #@param {type:\"string\"}\n", "#@markdown 选中模型epoch\n", - "MODELEPOCH = 7700 #@param {type:\"integer\"}\n", + "MODELEPOCH = 9600 #@param {type:\"integer\"}\n", "\n", "!echo \"备份选中的模型。。。\"\n", "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", diff --git a/infer-web.py b/infer-web.py index c838a2a..3067658 100644 --- a/infer-web.py +++ b/infer-web.py @@ -9,7 +9,7 @@ import faiss ncpu=cpu_count() ngpu=torch.cuda.device_count() gpu_infos=[] -if(torch.cuda.is_available()==False or ngpu==0):if_gpu_ok=False +if((not torch.cuda.is_available()) or ngpu==0):if_gpu_ok=False else: if_gpu_ok = False for i in range(ngpu): @@ -140,7 +140,7 @@ def uvr(model_name,inp_root,save_root_vocal,paths,save_root_ins): except: traceback.print_exc() print("clean_empty_cache") - torch.cuda.empty_cache() + if torch.cuda.is_available(): torch.cuda.empty_cache() yield "\n".join(infos) #一个选项卡全局只能有一个音色 @@ -152,7 +152,7 @@ def get_vc(sid): print("clean_empty_cache") del net_g, n_spk, vc, hubert_model,tgt_sr#,cpt hubert_model = net_g=n_spk=vc=hubert_model=tgt_sr=None - torch.cuda.empty_cache() + if torch.cuda.is_available(): torch.cuda.empty_cache() ###楼下不这么折腾清理不干净 if_f0 = cpt.get("f0", 1) if (if_f0 == 1): @@ -160,7 +160,7 @@ def get_vc(sid): else: net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) del net_g,cpt - torch.cuda.empty_cache() + if torch.cuda.is_available(): torch.cuda.empty_cache() cpt=None return {"visible": False, "__type__": "update"} person = "%s/%s" % (weight_root, sid) diff --git a/infer/infer-pm-index256.py b/infer/infer-pm-index256.py index dd94834..add0245 100644 --- a/infer/infer-pm-index256.py +++ b/infer/infer-pm-index256.py @@ -104,7 +104,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):## "padding_mask": padding_mask.to(device), "output_layer": 9, # layer 9 } - torch.cuda.synchronize() + if torch.cuda.is_available(): torch.cuda.synchronize() t0=ttime() with torch.no_grad(): logits = model.extract_features(**inputs) @@ -116,13 +116,13 @@ for idx,name in enumerate(["冬之花clip1.wav",]):## feats = torch.from_numpy(big_npy[I.squeeze()].astype("float16")).unsqueeze(0).to(device) feats=F.interpolate(feats.permute(0,2,1),scale_factor=2).permute(0,2,1) - torch.cuda.synchronize() + if torch.cuda.is_available(): torch.cuda.synchronize() t1=ttime() # p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存 p_len = min(feats.shape[1],10000)# pitch, pitchf = get_f0(audio, p_len,f0_up_key) p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存 - torch.cuda.synchronize() + if torch.cuda.is_available(): torch.cuda.synchronize() t2=ttime() feats = feats[:,:p_len, :] pitch = pitch[:p_len] @@ -133,7 +133,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):## pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device) with torch.no_grad(): audio = net_g.infer(feats, p_len,pitch,pitchf,sid)[0][0, 0].data.cpu().float().numpy()#nsf - torch.cuda.synchronize() + if torch.cuda.is_available(): torch.cuda.synchronize() t3=ttime() ta0+=(t1-t0) ta1+=(t2-t1) diff --git a/requirements.txt b/requirements.txt index e00ebda..82c73f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ scipy==1.9.3 librosa==0.9.2 llvmlite==0.39.0 fairseq==0.12.2 -faiss-cpu==1.7.2 +faiss-cpu==1.7.0 gradio Cython future>=0.18.3 diff --git a/vc_infer_pipeline.py b/vc_infer_pipeline.py index 30b03e3..c15ce4e 100644 --- a/vc_infer_pipeline.py +++ b/vc_infer_pipeline.py @@ -72,6 +72,7 @@ class VC(object): "output_layer": 9, # layer 9 } t0 = ttime() + print("vc npy start time:", t0) with torch.no_grad(): logits = model.extract_features(**inputs) feats = model.final_proj(logits[0]) @@ -79,13 +80,14 @@ class VC(object): if(isinstance(index,type(None))==False and isinstance(big_npy,type(None))==False and index_rate!=0): npy = feats[0].cpu().numpy() if(self.is_half==True):npy=npy.astype("float32") - D, I = index.search(npy, 1) + _, I = index.search(npy, 1) npy=big_npy[I.squeeze()] if(self.is_half==True):npy=npy.astype("float16") feats = torch.from_numpy(npy).unsqueeze(0).to(self.device)*index_rate + (1-index_rate)*feats feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) t1 = ttime() + print("vc infer start time:", t1) p_len = audio0.shape[0]//self.window if(feats.shape[1] Date: Mon, 10 Apr 2023 18:34:10 +0800 Subject: [PATCH 2/8] =?UTF-8?q?=E4=BC=98=E5=8C=96print?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- infer-web.py | 2 +- vc_infer_pipeline.py | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/infer-web.py b/infer-web.py index 3067658..d2cd506 100644 --- a/infer-web.py +++ b/infer-web.py @@ -79,7 +79,7 @@ def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,file_big_np if(hubert_model==None):load_hubert() if_f0 = cpt.get("f0", 1) audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file) - print(times) + print("npy: ", times[0], "s, f0:", times[1], "s, infer: ", times[2], "s", sep='') return "Success", (tgt_sr, audio_opt) except: info=traceback.format_exc() diff --git a/vc_infer_pipeline.py b/vc_infer_pipeline.py index c15ce4e..e05ef4c 100644 --- a/vc_infer_pipeline.py +++ b/vc_infer_pipeline.py @@ -72,7 +72,6 @@ class VC(object): "output_layer": 9, # layer 9 } t0 = ttime() - print("vc npy start time:", t0) with torch.no_grad(): logits = model.extract_features(**inputs) feats = model.final_proj(logits[0]) @@ -87,7 +86,6 @@ class VC(object): feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) t1 = ttime() - print("vc infer start time:", t1) p_len = audio0.shape[0]//self.window if(feats.shape[1] Date: Mon, 10 Apr 2023 18:46:58 +0800 Subject: [PATCH 3/8] edit README --- README.md | 4 ++++ README_en.md | 3 +++ requirements.txt | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b93d9dc..2de4373 100644 --- a/README.md +++ b/README.md @@ -47,9 +47,13 @@ poetry install ``` 你也可以通过pip来安装依赖: + +**注意**: `MacOS`下`faiss 1.7.2`版本会导致抛出段错误,请将`requirements.txt`的对应条目改为`faiss-cpu==1.7.0` + ```bash pip install -r requirements.txt ``` + ## 其他预模型准备 RVC需要其他的一些预模型来推理和训练。 diff --git a/README_en.md b/README_en.md index 6fe55e8..8e14e6b 100644 --- a/README_en.md +++ b/README_en.md @@ -40,9 +40,12 @@ poetry install ``` You can also use pip to install the dependencies +**Notice**: `faiss 1.7.2` will raise Segmentation Fault: 11 under `MacOS`, please change corresponding line in `requirements.txt` to `faiss-cpu==1.7.0` + ```bash pip install -r requirements.txt ``` + ## Preparation of other Pre-models RVC requires other pre-models to infer and train. diff --git a/requirements.txt b/requirements.txt index 82c73f4..e00ebda 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ scipy==1.9.3 librosa==0.9.2 llvmlite==0.39.0 fairseq==0.12.2 -faiss-cpu==1.7.0 +faiss-cpu==1.7.2 gradio Cython future>=0.18.3 From 1c1ee8ebc15b260c9d560003c7d1fedd75059549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:51:15 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E7=BE=8E=E5=8C=96=E7=95=8C=E9=9D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2de4373..3c6b4a9 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,28 @@ -# Retrieval-based-Voice-Conversion-WebUI +
+ +

Retrieval-based-Voice-Conversion-WebUI

+一个基于VITS的简单易用的语音转换(变声器)框架。

[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI) +
+ [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) [![Licence](https://img.shields.io/github/license/liujing04/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/%E4%BD%BF%E7%94%A8%E9%9C%80%E9%81%B5%E5%AE%88%E7%9A%84%E5%8D%8F%E8%AE%AE-LICENSE.txt) [![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-blue.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/) -### 使用了RVC的实时语音转换 : [w-okada/voice-changer](https://github.com/w-okada/voice-changer) ------- +
-一个基于VITS的简单易用的语音转换(变声器)框架。 +------ [**更新日志**](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Changelog_CN.md) [**English**](./README_en.md) | [**中文简体**](./README.md) > 点此查看我们的[演示视频](https://www.bilibili.com/video/BV1pm4y1z7Gm/) ! + +> 使用了RVC的实时语音转换: [w-okada/voice-changer](https://github.com/w-okada/voice-changer) + ## 简介 本仓库具有以下特点: + 使用top1特征模型检索来杜绝音色泄漏; From 1ac11aee46176a8800d41eeba8d9769f13f50c94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:52:30 +0800 Subject: [PATCH 5/8] add counter --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 3c6b4a9..5009de3 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@
+
+ [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) [![Licence](https://img.shields.io/github/license/liujing04/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/%E4%BD%BF%E7%94%A8%E9%9C%80%E9%81%B5%E5%AE%88%E7%9A%84%E5%8D%8F%E8%AE%AE-LICENSE.txt) [![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-blue.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/) From 342cb735bbdf77db59e4fe7ea8ac2a3bce4c8c71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:53:25 +0800 Subject: [PATCH 6/8] edit README --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 5009de3..a2bc7ec 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,6 @@ [![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI) -
-
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) From 45d4daba2ce19362b429766af954e107bc1529c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:54:02 +0800 Subject: [PATCH 7/8] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2bc7ec..7e629d2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@

Retrieval-based-Voice-Conversion-WebUI

-一个基于VITS的简单易用的语音转换(变声器)框架。

+一个基于VITS的简单易用的语音转换(变声器)框架

[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI) From 1ad3f6ef1224c49b70371d1a963d190c09f54f7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Mon, 10 Apr 2023 18:55:04 +0800 Subject: [PATCH 8/8] remove old README --- README_v0.md | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 README_v0.md diff --git a/README_v0.md b/README_v0.md deleted file mode 100644 index 237679d..0000000 --- a/README_v0.md +++ /dev/null @@ -1,32 +0,0 @@ -# Retrieval-based-Voice-Conversion-WebUI - -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) - -缺失的2个文件夹和2个文件: - -hubert_base.pt - -ffmpeg(自己确保ffmpeg命令能执行就行) - -pretrained文件夹 - -uvr5_weights文件夹 - -文件太大github传不动,去huggingface上下https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main - -当然你也可以直接看看RVC-beta.7z这个文件→_→ - -按照requirements.txt用pip装好环境,python infer-web.py就能用了 - -根据经验,librosa numpy和numba三个包最好写死版本否则容易有坑,其他的包版本不太重要 - -宣传视频:https://www.bilibili.com/video/BV1pm4y1z7Gm/ - -教程见小白简易教程.doc - -We will develop an English version windows WebUI APP in 2 weeks. - - -### Realtime Voice Conversion Software using RVC - -https://github.com/w-okada/voice-changer