mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-01-01 20:45:04 +08:00
Merge pull request #28 from fumiama/main
fix: MacOS 纯 CPU 推理时 Segmentation fault: 11
This commit is contained in:
commit
adade0e225
19
README.md
19
README.md
@ -1,21 +1,28 @@
|
||||
# Retrieval-based-Voice-Conversion-WebUI
|
||||
<div align="center">
|
||||
|
||||
<h1>Retrieval-based-Voice-Conversion-WebUI</h1>
|
||||
一个基于VITS的简单易用的语音转换(变声器)框架<br><br>
|
||||
|
||||
[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
|
||||
|
||||
<img src="https://counter.seku.su/cmoe?name=rvc&theme=r34" /><br>
|
||||
|
||||
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
|
||||
[![Licence](https://img.shields.io/github/license/liujing04/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/%E4%BD%BF%E7%94%A8%E9%9C%80%E9%81%B5%E5%AE%88%E7%9A%84%E5%8D%8F%E8%AE%AE-LICENSE.txt)
|
||||
[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-blue.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/)
|
||||
|
||||
### 使用了RVC的实时语音转换 : [w-okada/voice-changer](https://github.com/w-okada/voice-changer)
|
||||
------
|
||||
</div>
|
||||
|
||||
一个基于VITS的简单易用的语音转换(变声器)框架。
|
||||
------
|
||||
|
||||
[**更新日志**](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Changelog_CN.md)
|
||||
|
||||
[**English**](./README_en.md) | [**中文简体**](./README.md)
|
||||
|
||||
> 点此查看我们的[演示视频](https://www.bilibili.com/video/BV1pm4y1z7Gm/) !
|
||||
|
||||
> 使用了RVC的实时语音转换: [w-okada/voice-changer](https://github.com/w-okada/voice-changer)
|
||||
|
||||
## 简介
|
||||
本仓库具有以下特点:
|
||||
+ 使用top1特征模型检索来杜绝音色泄漏;
|
||||
@ -47,9 +54,13 @@ poetry install
|
||||
```
|
||||
|
||||
你也可以通过pip来安装依赖:
|
||||
|
||||
**注意**: `MacOS`下`faiss 1.7.2`版本会导致抛出段错误,请将`requirements.txt`的对应条目改为`faiss-cpu==1.7.0`
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## 其他预模型准备
|
||||
RVC需要其他的一些预模型来推理和训练。
|
||||
|
||||
|
@ -40,9 +40,12 @@ poetry install
|
||||
```
|
||||
You can also use pip to install the dependencies
|
||||
|
||||
**Notice**: `faiss 1.7.2` will raise Segmentation Fault: 11 under `MacOS`, please change corresponding line in `requirements.txt` to `faiss-cpu==1.7.0`
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Preparation of other Pre-models
|
||||
RVC requires other pre-models to infer and train.
|
||||
|
||||
|
32
README_v0.md
32
README_v0.md
@ -1,32 +0,0 @@
|
||||
# Retrieval-based-Voice-Conversion-WebUI
|
||||
|
||||
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
|
||||
|
||||
缺失的2个文件夹和2个文件:
|
||||
|
||||
hubert_base.pt
|
||||
|
||||
ffmpeg(自己确保ffmpeg命令能执行就行)
|
||||
|
||||
pretrained文件夹
|
||||
|
||||
uvr5_weights文件夹
|
||||
|
||||
文件太大github传不动,去huggingface上下https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main
|
||||
|
||||
当然你也可以直接看看RVC-beta.7z这个文件→_→
|
||||
|
||||
按照requirements.txt用pip装好环境,python infer-web.py就能用了
|
||||
|
||||
根据经验,librosa numpy和numba三个包最好写死版本否则容易有坑,其他的包版本不太重要
|
||||
|
||||
宣传视频:https://www.bilibili.com/video/BV1pm4y1z7Gm/
|
||||
|
||||
教程见小白简易教程.doc
|
||||
|
||||
We will develop an English version windows WebUI APP in 2 weeks.
|
||||
|
||||
|
||||
### Realtime Voice Conversion Software using RVC
|
||||
|
||||
https://github.com/w-okada/voice-changer
|
@ -129,10 +129,10 @@
|
||||
"#@title 从谷歌云盘加载打包好的数据集到/content/dataset\n",
|
||||
"\n",
|
||||
"#@markdown 数据集位置\n",
|
||||
"DATASET = \"/content/drive/MyDrive/dataset/lulucall_48k.zip\" #@param {type:\"string\"}\n",
|
||||
"DATASET = \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\" #@param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"!mkdir -p /content/dataset\n",
|
||||
"!unzip -d /content/dataset {DATASET}"
|
||||
"!unzip -d /content/dataset -B {DATASET}"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Mwk7Q0Loqzjx"
|
||||
@ -140,13 +140,26 @@
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"#@title 重命名数据集中的重名文件\n",
|
||||
"!ls -a /content/dataset/\n",
|
||||
"!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "PDlFxWHWEynD"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"#@title 启动web\n",
|
||||
"%cd /content/Retrieval-based-Voice-Conversion-WebUI\n",
|
||||
"%load_ext tensorboard\n",
|
||||
"%tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
|
||||
"# %load_ext tensorboard\n",
|
||||
"# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n",
|
||||
"!python3 infer-web.py --colab --pycmd python3"
|
||||
],
|
||||
"metadata": {
|
||||
@ -164,7 +177,7 @@
|
||||
"#@markdown 模型名\n",
|
||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||
"#@markdown 模型epoch\n",
|
||||
"MODELEPOCH = 7500 #@param {type:\"integer\"}\n",
|
||||
"MODELEPOCH = 9600 #@param {type:\"integer\"}\n",
|
||||
"\n",
|
||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n",
|
||||
@ -188,7 +201,7 @@
|
||||
"#@markdown 模型名\n",
|
||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||
"#@markdown 模型epoch\n",
|
||||
"MODELEPOCH = 6000 #@param {type:\"integer\"}\n",
|
||||
"MODELEPOCH = 7500 #@param {type:\"integer\"}\n",
|
||||
"\n",
|
||||
"!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n",
|
||||
"\n",
|
||||
@ -241,7 +254,7 @@
|
||||
"#@markdown 模型名\n",
|
||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||
"#@markdown 停止的epoch\n",
|
||||
"MODELEPOCH = 2500 #@param {type:\"integer\"}\n",
|
||||
"MODELEPOCH = 3200 #@param {type:\"integer\"}\n",
|
||||
"#@markdown 保存epoch间隔\n",
|
||||
"EPOCHSAVE = 100 #@param {type:\"integer\"}\n",
|
||||
"#@markdown 采样率\n",
|
||||
@ -262,7 +275,7 @@
|
||||
"#@markdown 模型名\n",
|
||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||
"#@markdown 选中模型epoch\n",
|
||||
"MODELEPOCH = 7700 #@param {type:\"integer\"}\n",
|
||||
"MODELEPOCH = 9600 #@param {type:\"integer\"}\n",
|
||||
"\n",
|
||||
"!echo \"备份选中的模型。。。\"\n",
|
||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
||||
@ -292,7 +305,7 @@
|
||||
"#@markdown 模型名\n",
|
||||
"MODELNAME = \"lulu\" #@param {type:\"string\"}\n",
|
||||
"#@markdown 选中模型epoch\n",
|
||||
"MODELEPOCH = 7700 #@param {type:\"integer\"}\n",
|
||||
"MODELEPOCH = 9600 #@param {type:\"integer\"}\n",
|
||||
"\n",
|
||||
"!echo \"备份选中的模型。。。\"\n",
|
||||
"!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n",
|
||||
|
10
infer-web.py
10
infer-web.py
@ -9,7 +9,7 @@ import faiss
|
||||
ncpu=cpu_count()
|
||||
ngpu=torch.cuda.device_count()
|
||||
gpu_infos=[]
|
||||
if(torch.cuda.is_available()==False or ngpu==0):if_gpu_ok=False
|
||||
if((not torch.cuda.is_available()) or ngpu==0):if_gpu_ok=False
|
||||
else:
|
||||
if_gpu_ok = False
|
||||
for i in range(ngpu):
|
||||
@ -79,7 +79,7 @@ def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,file_big_np
|
||||
if(hubert_model==None):load_hubert()
|
||||
if_f0 = cpt.get("f0", 1)
|
||||
audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file)
|
||||
print(times)
|
||||
print("npy: ", times[0], "s, f0:", times[1], "s, infer: ", times[2], "s", sep='')
|
||||
return "Success", (tgt_sr, audio_opt)
|
||||
except:
|
||||
info=traceback.format_exc()
|
||||
@ -140,7 +140,7 @@ def uvr(model_name,inp_root,save_root_vocal,paths,save_root_ins):
|
||||
except:
|
||||
traceback.print_exc()
|
||||
print("clean_empty_cache")
|
||||
torch.cuda.empty_cache()
|
||||
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||
yield "\n".join(infos)
|
||||
|
||||
#一个选项卡全局只能有一个音色
|
||||
@ -152,7 +152,7 @@ def get_vc(sid):
|
||||
print("clean_empty_cache")
|
||||
del net_g, n_spk, vc, hubert_model,tgt_sr#,cpt
|
||||
hubert_model = net_g=n_spk=vc=hubert_model=tgt_sr=None
|
||||
torch.cuda.empty_cache()
|
||||
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||
###楼下不这么折腾清理不干净
|
||||
if_f0 = cpt.get("f0", 1)
|
||||
if (if_f0 == 1):
|
||||
@ -160,7 +160,7 @@ def get_vc(sid):
|
||||
else:
|
||||
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
||||
del net_g,cpt
|
||||
torch.cuda.empty_cache()
|
||||
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||
cpt=None
|
||||
return {"visible": False, "__type__": "update"}
|
||||
person = "%s/%s" % (weight_root, sid)
|
||||
|
@ -104,7 +104,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):##
|
||||
"padding_mask": padding_mask.to(device),
|
||||
"output_layer": 9, # layer 9
|
||||
}
|
||||
torch.cuda.synchronize()
|
||||
if torch.cuda.is_available(): torch.cuda.synchronize()
|
||||
t0=ttime()
|
||||
with torch.no_grad():
|
||||
logits = model.extract_features(**inputs)
|
||||
@ -116,13 +116,13 @@ for idx,name in enumerate(["冬之花clip1.wav",]):##
|
||||
feats = torch.from_numpy(big_npy[I.squeeze()].astype("float16")).unsqueeze(0).to(device)
|
||||
|
||||
feats=F.interpolate(feats.permute(0,2,1),scale_factor=2).permute(0,2,1)
|
||||
torch.cuda.synchronize()
|
||||
if torch.cuda.is_available(): torch.cuda.synchronize()
|
||||
t1=ttime()
|
||||
# p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
|
||||
p_len = min(feats.shape[1],10000)#
|
||||
pitch, pitchf = get_f0(audio, p_len,f0_up_key)
|
||||
p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
|
||||
torch.cuda.synchronize()
|
||||
if torch.cuda.is_available(): torch.cuda.synchronize()
|
||||
t2=ttime()
|
||||
feats = feats[:,:p_len, :]
|
||||
pitch = pitch[:p_len]
|
||||
@ -133,7 +133,7 @@ for idx,name in enumerate(["冬之花clip1.wav",]):##
|
||||
pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device)
|
||||
with torch.no_grad():
|
||||
audio = net_g.infer(feats, p_len,pitch,pitchf,sid)[0][0, 0].data.cpu().float().numpy()#nsf
|
||||
torch.cuda.synchronize()
|
||||
if torch.cuda.is_available(): torch.cuda.synchronize()
|
||||
t3=ttime()
|
||||
ta0+=(t1-t0)
|
||||
ta1+=(t2-t1)
|
||||
|
@ -79,7 +79,7 @@ class VC(object):
|
||||
if(isinstance(index,type(None))==False and isinstance(big_npy,type(None))==False and index_rate!=0):
|
||||
npy = feats[0].cpu().numpy()
|
||||
if(self.is_half==True):npy=npy.astype("float32")
|
||||
D, I = index.search(npy, 1)
|
||||
_, I = index.search(npy, 1)
|
||||
npy=big_npy[I.squeeze()]
|
||||
if(self.is_half==True):npy=npy.astype("float16")
|
||||
feats = torch.from_numpy(npy).unsqueeze(0).to(self.device)*index_rate + (1-index_rate)*feats
|
||||
@ -99,7 +99,7 @@ class VC(object):
|
||||
else:
|
||||
audio1 = (net_g.infer(feats, p_len, sid)[0][0, 0] * 32768).data.cpu().float().numpy().astype(np.int16)
|
||||
del feats,p_len,padding_mask
|
||||
torch.cuda.empty_cache()
|
||||
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||
t2 = ttime()
|
||||
times[0] += (t1 - t0)
|
||||
times[2] += (t2 - t1)
|
||||
@ -160,5 +160,5 @@ class VC(object):
|
||||
audio_opt.append(self.vc(model,net_g,sid,audio_pad[t:],None,None,times,index,big_npy,index_rate)[self.t_pad_tgt:-self.t_pad_tgt])
|
||||
audio_opt=np.concatenate(audio_opt)
|
||||
del pitch,pitchf,sid
|
||||
torch.cuda.empty_cache()
|
||||
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||
return audio_opt
|
||||
|
Loading…
Reference in New Issue
Block a user