Compare commits

..

8 Commits

Author SHA1 Message Date
github-actions[bot]
25ec2c044b chore(format): run black on dev 2024-01-16 11:32:09 +00:00
github-actions[bot]
4e8e235024
chore(i18n): sync locale on dev (#1730)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-01-16 20:31:53 +09:00
Sơn Phan Trung
53051852f1
refactor(dlmodels.sh): get the script posix compliant, rework functions. (#1723) 2024-01-16 20:31:10 +09:00
Chengjia Jiang
49434901d8
fix: 多显卡训练时logger未赋值引用 (#1722)
modified:   infer/modules/train/train.py
2024-01-16 20:30:10 +09:00
Adachi
f6fa0c9cd9
typo: README.md (#1725) 2024-01-16 20:23:42 +09:00
yxlllc
c3e65cdf96
optimize: realtime inference (#1693)
* update real-time gui

* update real-time gui

* update real-time gui
2024-01-16 20:22:55 +09:00
NightWatcher314
26e2805f0e
fix poetry part for README.md (#1715)
* Update README.md

* Delete pyproject.toml

* Delete poetry.lock

* Update README.md
2024-01-14 17:04:30 +09:00
github-actions[bot]
4f7b6b2b9e
chore(format): run black on dev (#1717)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-01-14 16:43:14 +09:00
20 changed files with 275 additions and 4363 deletions

5
.gitignore vendored
View File

@ -21,3 +21,8 @@ rmvpe.pt
# To set a Python version for the project
.tool-versions
/runtime
/assets/weights/*
ffmpeg.*
ffprobe.*

View File

@ -93,9 +93,12 @@ pip install -r requirements-ipex.txt
```bash
curl -sSL https://install.python-poetry.org | python3 -
```
通过poetry安装依赖
通过 Poetry 安装依赖时python 建议使用 3.7-3.10 版本,其余版本在安装 llvmlite==0.39.0 时会出现冲突
```bash
poetry install
poetry init -n
poetry env use "path to your python.exe"
poetry run pip install -r requirments.txt
```
### MacOS
@ -133,7 +136,7 @@ sudo apt install ffmpeg
```bash
brew install ffmpeg
```
#### Windwos 用户
#### Windows 用户
下载后放置在根目录。
- 下载[ffmpeg.exe](https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe)
@ -174,6 +177,12 @@ sudo usermod -aG video $USERNAME
```bash
python infer-web.py
```
若先前使用 Poetry 安装依赖则可以通过以下方式启动WebUI
```bash
poetry run python infer-web.py
```
### 使用整合包
下载并解压`RVC-beta.7z`
#### Windows 用户

View File

@ -1 +1 @@
{"pth_path": "assets/weights/kikiV1.pth", "index_path": "logs/kikiV1.index", "sg_input_device": "VoiceMeeter Output (VB-Audio Vo (MME)", "sg_output_device": "VoiceMeeter Input (VB-Audio Voi (MME)", "sr_type": "sr_model", "threhold": -60.0, "pitch": 12.0, "rms_mix_rate": 0.5, "index_rate": 0.0, "block_time": 0.2, "crossfade_length": 0.08, "extra_time": 2.00, "n_cpu": 4.0, "use_jit": false, "use_pv": false, "f0method": "fcpe"}
{"pth_path": "assets/weights/kikiV1.pth", "index_path": "logs/kikiV1.index", "sg_hostapi": "MME", "sg_wasapi_exclusive": false, "sg_input_device": "VoiceMeeter Output (VB-Audio Vo", "sg_output_device": "VoiceMeeter Input (VB-Audio Voi", "sr_type": "sr_device", "threhold": -60.0, "pitch": 12.0, "rms_mix_rate": 0.5, "index_rate": 0.0, "block_time": 0.15, "crossfade_length": 0.08, "extra_time": 2.0, "n_cpu": 4.0, "use_jit": false, "use_pv": false, "f0method": "fcpe"}

186
gui_v1.py
View File

@ -125,6 +125,8 @@ if __name__ == "__main__":
self.index_rate: float = 0.0
self.n_cpu: int = min(n_cpu, 4)
self.f0method: str = "fcpe"
self.sg_hostapi: str = ""
self.wasapi_exclusive: bool = False
self.sg_input_device: str = ""
self.sg_output_device: str = ""
@ -134,6 +136,7 @@ if __name__ == "__main__":
self.config = Config()
self.function = "vc"
self.delay_time = 0
self.hostapis = None
self.input_devices = None
self.output_devices = None
self.input_devices_indices = None
@ -153,11 +156,25 @@ if __name__ == "__main__":
data["crepe"] = data["f0method"] == "crepe"
data["rmvpe"] = data["f0method"] == "rmvpe"
data["fcpe"] = data["f0method"] == "fcpe"
if data["sg_input_device"] not in self.input_devices:
if data["sg_hostapi"] in self.hostapis:
self.update_devices(hostapi_name=data["sg_hostapi"])
if (
data["sg_input_device"] not in self.input_devices
or data["sg_output_device"] not in self.output_devices
):
self.update_devices()
data["sg_hostapi"] = self.hostapis[0]
data["sg_input_device"] = self.input_devices[
self.input_devices_indices.index(sd.default.device[0])
]
data["sg_output_device"] = self.output_devices[
self.output_devices_indices.index(sd.default.device[1])
]
else:
data["sg_hostapi"] = self.hostapis[0]
data["sg_input_device"] = self.input_devices[
self.input_devices_indices.index(sd.default.device[0])
]
if data["sg_output_device"] not in self.output_devices:
data["sg_output_device"] = self.output_devices[
self.output_devices_indices.index(sd.default.device[1])
]
@ -166,6 +183,8 @@ if __name__ == "__main__":
data = {
"pth_path": "",
"index_path": "",
"sg_hostapi": self.hostapis[0],
"sg_wasapi_exclusive": False,
"sg_input_device": self.input_devices[
self.input_devices_indices.index(sd.default.device[0])
],
@ -233,12 +252,30 @@ if __name__ == "__main__":
[
sg.Frame(
layout=[
[
sg.Text(i18n("设备类型")),
sg.Combo(
self.hostapis,
key="sg_hostapi",
default_value=data.get("sg_hostapi", ""),
enable_events=True,
size=(20, 1),
),
sg.Checkbox(
i18n("独占 WASAPI 设备"),
key="sg_wasapi_exclusive",
default=data.get("sg_wasapi_exclusive", False),
enable_events=True,
),
],
[
sg.Text(i18n("输入设备")),
sg.Combo(
self.input_devices,
key="sg_input_device",
default_value=data.get("sg_input_device", ""),
enable_events=True,
size=(45, 1),
),
],
[
@ -247,6 +284,8 @@ if __name__ == "__main__":
self.output_devices,
key="sg_output_device",
default_value=data.get("sg_output_device", ""),
enable_events=True,
size=(45, 1),
),
],
[
@ -269,7 +308,7 @@ if __name__ == "__main__":
sg.Text("", key="sr_stream"),
],
],
title=i18n("音频设备(请使用同种类驱动)"),
title=i18n("音频设备"),
)
],
[
@ -365,7 +404,7 @@ if __name__ == "__main__":
[
sg.Text(i18n("采样长度")),
sg.Slider(
range=(0.02, 2.4),
range=(0.02, 1.5),
key="block_time",
resolution=0.01,
orientation="h",
@ -481,8 +520,13 @@ if __name__ == "__main__":
if event == sg.WINDOW_CLOSED:
self.stop_stream()
exit()
if event == "reload_devices":
self.update_devices()
if event == "reload_devices" or event == "sg_hostapi":
self.gui_config.sg_hostapi = values["sg_hostapi"]
self.update_devices(hostapi_name=values["sg_hostapi"])
if self.gui_config.sg_hostapi not in self.hostapis:
self.gui_config.sg_hostapi = self.hostapis[0]
self.window["sg_hostapi"].Update(values=self.hostapis)
self.window["sg_hostapi"].Update(value=self.gui_config.sg_hostapi)
if self.gui_config.sg_input_device not in self.input_devices:
self.gui_config.sg_input_device = self.input_devices[0]
self.window["sg_input_device"].Update(values=self.input_devices)
@ -502,6 +546,8 @@ if __name__ == "__main__":
settings = {
"pth_path": values["pth_path"],
"index_path": values["index_path"],
"sg_hostapi": values["sg_hostapi"],
"sg_wasapi_exclusive": values["sg_wasapi_exclusive"],
"sg_input_device": values["sg_input_device"],
"sg_output_device": values["sg_output_device"],
"sr_type": ["sr_model", "sr_device"][
@ -544,7 +590,9 @@ if __name__ == "__main__":
if values["I_noise_reduce"]:
self.delay_time += min(values["crossfade_length"], 0.04)
self.window["sr_stream"].update(self.gui_config.samplerate)
self.window["delay_time"].update(int(self.delay_time * 1000))
self.window["delay_time"].update(
int(np.round(self.delay_time * 1000))
)
# Parameter hot update
if event == "threhold":
self.gui_config.threhold = values["threhold"]
@ -566,7 +614,9 @@ if __name__ == "__main__":
self.delay_time += (
1 if values["I_noise_reduce"] else -1
) * min(values["crossfade_length"], 0.04)
self.window["delay_time"].update(int(self.delay_time * 1000))
self.window["delay_time"].update(
int(np.round(self.delay_time * 1000))
)
elif event == "O_noise_reduce":
self.gui_config.O_noise_reduce = values["O_noise_reduce"]
elif event == "use_pv":
@ -594,6 +644,8 @@ if __name__ == "__main__":
self.set_devices(values["sg_input_device"], values["sg_output_device"])
self.config.use_jit = False # values["use_jit"]
# self.device_latency = values["device_latency"]
self.gui_config.sg_hostapi = values["sg_hostapi"]
self.gui_config.sg_wasapi_exclusive = values["sg_wasapi_exclusive"]
self.gui_config.sg_input_device = values["sg_input_device"]
self.gui_config.sg_output_device = values["sg_output_device"]
self.gui_config.pth_path = values["pth_path"]
@ -644,6 +696,7 @@ if __name__ == "__main__":
if self.gui_config.sr_type == "sr_model"
else self.get_device_samplerate()
)
self.gui_config.channels = self.get_device_channels()
self.zc = self.gui_config.samplerate // 100
self.block_frame = (
int(
@ -686,19 +739,18 @@ if __name__ == "__main__":
device=self.config.device,
dtype=torch.float32,
)
self.input_wav_denoise: torch.Tensor = self.input_wav.clone()
self.input_wav_res: torch.Tensor = torch.zeros(
160 * self.input_wav.shape[0] // self.zc,
device=self.config.device,
dtype=torch.float32,
)
self.rms_buffer: np.ndarray = np.zeros(4 * self.zc, dtype="float32")
self.sola_buffer: torch.Tensor = torch.zeros(
self.sola_buffer_frame, device=self.config.device, dtype=torch.float32
)
self.nr_buffer: torch.Tensor = self.sola_buffer.clone()
self.output_buffer: torch.Tensor = self.input_wav.clone()
self.res_buffer: torch.Tensor = torch.zeros(
2 * self.zc, device=self.config.device, dtype=torch.float32
)
self.skip_head = self.extra_frame // self.zc
self.return_length = (
self.block_frame + self.sola_buffer_frame + self.sola_search_frame
@ -740,13 +792,20 @@ if __name__ == "__main__":
global flag_vc
if not flag_vc:
flag_vc = True
channels = 1 if sys.platform == "darwin" else 2
if (
"WASAPI" in self.gui_config.sg_hostapi
and self.gui_config.sg_wasapi_exclusive
):
extra_settings = sd.WasapiSettings(exclusive=True)
else:
extra_settings = None
self.stream = sd.Stream(
channels=channels,
callback=self.audio_callback,
blocksize=self.block_frame,
samplerate=self.gui_config.samplerate,
channels=self.gui_config.channels,
dtype="float32",
extra_settings=extra_settings,
)
self.stream.start()
@ -755,7 +814,7 @@ if __name__ == "__main__":
if flag_vc:
flag_vc = False
if self.stream is not None:
self.stream.stop()
self.stream.abort()
self.stream.close()
self.stream = None
@ -769,48 +828,54 @@ if __name__ == "__main__":
start_time = time.perf_counter()
indata = librosa.to_mono(indata.T)
if self.gui_config.threhold > -60:
indata = np.append(self.rms_buffer, indata)
rms = librosa.feature.rms(
y=indata, frame_length=4 * self.zc, hop_length=self.zc
)
)[:, 2:]
self.rms_buffer[:] = indata[-4 * self.zc :]
indata = indata[2 * self.zc - self.zc // 2 :]
db_threhold = (
librosa.amplitude_to_db(rms, ref=1.0)[0] < self.gui_config.threhold
)
for i in range(db_threhold.shape[0]):
if db_threhold[i]:
indata[i * self.zc : (i + 1) * self.zc] = 0
indata = indata[self.zc // 2 :]
self.input_wav[: -self.block_frame] = self.input_wav[
self.block_frame :
].clone()
self.input_wav[-self.block_frame :] = torch.from_numpy(indata).to(
self.input_wav[-indata.shape[0] :] = torch.from_numpy(indata).to(
self.config.device
)
self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[
self.block_frame_16k :
].clone()
# input noise reduction and resampling
if self.gui_config.I_noise_reduce and self.function == "vc":
input_wav = self.input_wav[
-self.sola_buffer_frame - self.block_frame - 2 * self.zc :
]
if self.gui_config.I_noise_reduce:
self.input_wav_denoise[: -self.block_frame] = self.input_wav_denoise[
self.block_frame :
].clone()
input_wav = self.input_wav[-self.sola_buffer_frame - self.block_frame :]
input_wav = self.tg(
input_wav.unsqueeze(0), self.input_wav.unsqueeze(0)
)[0, 2 * self.zc :]
).squeeze(0)
input_wav[: self.sola_buffer_frame] *= self.fade_in_window
input_wav[: self.sola_buffer_frame] += (
self.nr_buffer * self.fade_out_window
)
self.input_wav_denoise[-self.block_frame :] = input_wav[
: self.block_frame
]
self.nr_buffer[:] = input_wav[self.block_frame :]
input_wav = torch.cat(
(self.res_buffer[:], input_wav[: self.block_frame])
)
self.res_buffer[:] = input_wav[-2 * self.zc :]
self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(
input_wav
self.input_wav_denoise[-self.block_frame - 2 * self.zc :]
)[160:]
else:
self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(
self.input_wav[-self.block_frame - 2 * self.zc :]
)[160:]
self.input_wav_res[
-160 * (indata.shape[0] // self.zc + 1) :
] = self.resampler(self.input_wav[-indata.shape[0] - 2 * self.zc :])[
160:
]
# infer
if self.function == "vc":
infer_wav = self.rvc.infer(
@ -822,14 +887,12 @@ if __name__ == "__main__":
)
if self.resampler2 is not None:
infer_wav = self.resampler2(infer_wav)
elif self.gui_config.I_noise_reduce:
infer_wav = self.input_wav_denoise[self.extra_frame :].clone()
else:
infer_wav = self.input_wav[
-self.crossfade_frame - self.sola_search_frame - self.block_frame :
].clone()
infer_wav = self.input_wav[self.extra_frame :].clone()
# output noise reduction
if (self.gui_config.O_noise_reduce and self.function == "vc") or (
self.gui_config.I_noise_reduce and self.function == "im"
):
if self.gui_config.O_noise_reduce and self.function == "vc":
self.output_buffer[: -self.block_frame] = self.output_buffer[
self.block_frame :
].clone()
@ -839,16 +902,14 @@ if __name__ == "__main__":
).squeeze(0)
# volume envelop mixing
if self.gui_config.rms_mix_rate < 1 and self.function == "vc":
if self.gui_config.I_noise_reduce:
input_wav = self.input_wav_denoise[self.extra_frame :]
else:
input_wav = self.input_wav[self.extra_frame :]
rms1 = librosa.feature.rms(
y=self.input_wav_res[
160
* self.skip_head : 160
* (self.skip_head + self.return_length)
]
.cpu()
.numpy(),
frame_length=640,
hop_length=160,
y=input_wav[: infer_wav.shape[0]].cpu().numpy(),
frame_length=4 * self.zc,
hop_length=self.zc,
)
rms1 = torch.from_numpy(rms1).to(self.config.device)
rms1 = F.interpolate(
@ -907,19 +968,22 @@ if __name__ == "__main__":
self.sola_buffer[:] = infer_wav[
self.block_frame : self.block_frame + self.sola_buffer_frame
]
if sys.platform == "darwin":
outdata[:] = infer_wav[: self.block_frame].cpu().numpy()[:, np.newaxis]
else:
outdata[:] = (
infer_wav[: self.block_frame].repeat(2, 1).t().cpu().numpy()
infer_wav[: self.block_frame]
.repeat(self.gui_config.channels, 1)
.t()
.cpu()
.numpy()
)
total_time = time.perf_counter() - start_time
if flag_vc:
self.window["infer_time"].update(int(total_time * 1000))
printt("Infer time: %.2f", total_time)
def update_devices(self):
def update_devices(self, hostapi_name=None):
"""获取设备列表"""
global flag_vc
flag_vc = False
sd._terminate()
sd._initialize()
devices = sd.query_devices()
@ -927,25 +991,28 @@ if __name__ == "__main__":
for hostapi in hostapis:
for device_idx in hostapi["devices"]:
devices[device_idx]["hostapi_name"] = hostapi["name"]
self.hostapis = [hostapi["name"] for hostapi in hostapis]
if hostapi_name not in self.hostapis:
hostapi_name = self.hostapis[0]
self.input_devices = [
f"{d['name']} ({d['hostapi_name']})"
d["name"]
for d in devices
if d["max_input_channels"] > 0
if d["max_input_channels"] > 0 and d["hostapi_name"] == hostapi_name
]
self.output_devices = [
f"{d['name']} ({d['hostapi_name']})"
d["name"]
for d in devices
if d["max_output_channels"] > 0
if d["max_output_channels"] > 0 and d["hostapi_name"] == hostapi_name
]
self.input_devices_indices = [
d["index"] if "index" in d else d["name"]
for d in devices
if d["max_input_channels"] > 0
if d["max_input_channels"] > 0 and d["hostapi_name"] == hostapi_name
]
self.output_devices_indices = [
d["index"] if "index" in d else d["name"]
for d in devices
if d["max_output_channels"] > 0
if d["max_output_channels"] > 0 and d["hostapi_name"] == hostapi_name
]
def set_devices(self, input_device, output_device):
@ -964,4 +1031,13 @@ if __name__ == "__main__":
sd.query_devices(device=sd.default.device[0])["default_samplerate"]
)
def get_device_channels(self):
max_input_channels = sd.query_devices(device=sd.default.device[0])[
"max_input_channels"
]
max_output_channels = sd.query_devices(device=sd.default.device[1])[
"max_output_channels"
]
return min(max_input_channels, max_output_channels, 2)
gui = GUI()

View File

@ -90,6 +90,7 @@
"版本": "Version",
"特征提取": "Feature extraction",
"特征检索库文件路径,为空则使用下拉的选择结果": "Path to the feature index file. Leave blank to use the selected result from the dropdown:",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Recommended +12 key for male to female conversion, and -12 key for female to male conversion. If the sound range goes too far and the voice is distorted, you can also adjust it to the appropriate range by yourself.",
"目标采样率": "Target sample rate:",
"算法延迟(ms):": "Algorithmic delays(ms):",
@ -101,6 +102,7 @@
"训练模型": "Train model",
"训练特征索引": "Train feature index",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Training complete. You can check the training logs in the console or the 'train.log' file under the experiment folder.",
"设备类型": "设备类型",
"请指定说话人id": "Please specify the speaker/singer ID:",
"请选择index文件": "Please choose the .index file",
"请选择pth文件": "Please choose the .pth file",
@ -129,7 +131,7 @@
"采样长度": "Sample length",
"重载设备列表": "Reload device list",
"音调设置": "Pitch settings",
"音频设备(请使用同种类驱动)": "Audio device (please use the same type of driver)",
"音频设备": "Audio device",
"音高算法": "pitch detection algorithm",
"额外推理时长": "Extra inference time"
}

View File

@ -90,6 +90,7 @@
"版本": "Versión",
"特征提取": "Extracción de características",
"特征检索库文件路径,为空则使用下拉的选择结果": "Ruta del archivo de la biblioteca de características, si está vacío, se utilizará el resultado de la selección desplegable",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Tecla +12 recomendada para conversión de voz de hombre a mujer, tecla -12 para conversión de voz de mujer a hombre. Si el rango de tono es demasiado amplio y causa distorsión, ajústelo usted mismo a un rango adecuado.",
"目标采样率": "Tasa de muestreo objetivo",
"算法延迟(ms):": "算法延迟(ms):",
@ -101,6 +102,7 @@
"训练模型": "Entrenar Modelo",
"训练特征索引": "Índice de características",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Entrenamiento finalizado, puede ver el registro de entrenamiento en la consola o en el archivo train.log en la carpeta del experimento",
"设备类型": "设备类型",
"请指定说话人id": "ID del modelo",
"请选择index文件": "Seleccione el archivo .index",
"请选择pth文件": "Seleccione el archivo .pth",
@ -129,7 +131,7 @@
"采样长度": "Longitud de muestreo",
"重载设备列表": "Actualizar lista de dispositivos",
"音调设置": "Ajuste de tono",
"音频设备(请使用同种类驱动)": "Dispositivo de audio (utilice el mismo tipo de controlador)",
"音频设备": "Dispositivo de audio",
"音高算法": "Algoritmo de tono",
"额外推理时长": "Tiempo de inferencia adicional"
}

View File

@ -90,6 +90,7 @@
"版本": "Version",
"特征提取": "Extraction des caractéristiques",
"特征检索库文件路径,为空则使用下拉的选择结果": "Chemin d'accès au fichier d'index des caractéristiques. Laisser vide pour utiliser le résultat sélectionné dans la liste déroulante :",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Il est recommandé d'utiliser la clé +12 pour la conversion homme-femme et la clé -12 pour la conversion femme-homme. Si la plage sonore est trop large et que la voix est déformée, vous pouvez également l'ajuster vous-même à la plage appropriée.",
"目标采样率": "Taux d'échantillonnage cible :",
"算法延迟(ms):": "Délais algorithmiques (ms):",
@ -101,6 +102,7 @@
"训练模型": "Entraîner le modèle",
"训练特征索引": "Entraîner l'index des caractéristiques",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Entraînement terminé. Vous pouvez consulter les rapports d'entraînement dans la console ou dans le fichier 'train.log' situé dans le dossier de l'expérience.",
"设备类型": "设备类型",
"请指定说话人id": "Veuillez spécifier l'ID de l'orateur ou du chanteur :",
"请选择index文件": "Veuillez sélectionner le fichier d'index",
"请选择pth文件": "Veuillez sélectionner le fichier pth",
@ -129,7 +131,7 @@
"采样长度": "Longueur de l'échantillon",
"重载设备列表": "Recharger la liste des dispositifs",
"音调设置": "Réglages de la hauteur",
"音频设备(请使用同种类驱动)": "Périphérique audio (veuillez utiliser le même type de pilote)",
"音频设备": "Périphérique audio",
"音高算法": "algorithme de détection de la hauteur",
"额外推理时长": "Temps d'inférence supplémentaire"
}

View File

@ -90,6 +90,7 @@
"版本": "Versione",
"特征提取": "Estrazione delle caratteristiche",
"特征检索库文件路径,为空则使用下拉的选择结果": "Percorso del file di indice delle caratteristiche. ",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Tonalità +12 consigliata per la conversione da maschio a femmina e tonalità -12 per la conversione da femmina a maschio. ",
"目标采样率": "Frequenza di campionamento target:",
"算法延迟(ms):": "算法延迟(ms):",
@ -101,6 +102,7 @@
"训练模型": "Addestra modello",
"训练特征索引": "Addestra indice delle caratteristiche",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Addestramento completato. ",
"设备类型": "设备类型",
"请指定说话人id": "Si prega di specificare l'ID del locutore/cantante:",
"请选择index文件": "请选择index文件",
"请选择pth文件": "请选择pth 文件",
@ -129,7 +131,7 @@
"采样长度": "Lunghezza del campione",
"重载设备列表": "Ricaricare l'elenco dei dispositivi",
"音调设置": "Impostazioni del tono",
"音频设备(请使用同种类驱动)": "Dispositivo audio (utilizzare lo stesso tipo di driver)",
"音频设备": "Dispositivo audio",
"音高算法": "音高算法",
"额外推理时长": "Tempo di inferenza extra"
}

View File

@ -90,6 +90,7 @@
"版本": "バージョン",
"特征提取": "特徴抽出",
"特征检索库文件路径,为空则使用下拉的选择结果": "特徴検索ライブラリへのパス 空の場合はドロップダウンで選択",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "男性から女性へは+12キーをお勧めします。女性から男性へは-12キーをお勧めします。音域が広すぎて音質が劣化した場合は、適切な音域に自分で調整してください。",
"目标采样率": "目標サンプリングレート",
"算法延迟(ms):": "算法延迟(ms):",
@ -101,6 +102,7 @@
"训练模型": "モデルのトレーニング",
"训练特征索引": "特徴インデックスのトレーニング",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "トレーニング終了時に、トレーニングログやフォルダ内のtrain.logを確認することができます",
"设备类型": "设备类型",
"请指定说话人id": "話者IDを指定してください",
"请选择index文件": "indexファイルを選択してください",
"请选择pth文件": "pthファイルを選択してください",
@ -129,7 +131,7 @@
"采样长度": "サンプル長",
"重载设备列表": "デバイスリストをリロードする",
"音调设置": "音程設定",
"音频设备(请使用同种类驱动)": "オーディオデバイス(同じ種類のドライバーを使用してください)",
"音频设备": "オーディオデバイス",
"音高算法": "ピッチアルゴリズム",
"额外推理时长": "追加推論時間"
}

View File

@ -90,6 +90,7 @@
"版本": "Версия архитектуры модели:",
"特征提取": "Извлечь черты",
"特征检索库文件路径,为空则使用下拉的选择结果": "Путь к файлу индекса черт. Оставьте пустым, чтобы использовать выбранный вариант из списка ниже:",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Рекомендуется выбрать +12 для конвертирования мужского голоса в женский и -12 для конвертирования женского в мужской. Если диапазон голоса слишком велик, и голос искажается, можно выбрать значение на свой вкус.",
"目标采样率": "Частота дискретизации аудио:",
"算法延迟(ms):": "算法延迟(ms):",
@ -101,6 +102,7 @@
"训练模型": "Обучить модель",
"训练特征索引": "Обучить индекс черт",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Обучение модели завершено. Журнал обучения можно просмотреть в консоли или в файле 'train.log' в папке с моделью.",
"设备类型": "设备类型",
"请指定说话人id": "Номер говорящего/поющего:",
"请选择index文件": "Пожалуйста, выберите файл индекса",
"请选择pth文件": "Пожалуйста, выберите файл pth",
@ -129,7 +131,7 @@
"采样长度": "Длина сэмпла",
"重载设备列表": "Обновить список устройств",
"音调设置": "Настройка высоты звука",
"音频设备(请使用同种类驱动)": "Аудиоустройство (пожалуйста, используйте такой же тип драйвера)",
"音频设备": "Аудиоустройство",
"音高算法": "Алгоритм оценки высоты звука",
"额外推理时长": "Доп. время переработки"
}

View File

@ -90,6 +90,7 @@
"版本": "Sürüm",
"特征提取": "Özellik çıkartma",
"特征检索库文件路径,为空则使用下拉的选择结果": "Özellik indeksi dosyasının yolunu belirtin. Seçilen sonucu kullanmak için boş bırakın veya açılır menüden seçim yapın.",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Erkekten kadına çevirmek için +12 tuş önerilir, kadından erkeğe çevirmek için ise -12 tuş önerilir. Eğer ses aralığı çok fazla genişler ve ses bozulursa, isteğe bağlı olarak uygun aralığa kendiniz de ayarlayabilirsiniz.",
"目标采样率": "Hedef örnekleme oranı:",
"算法延迟(ms):": "算法延迟(ms):",
@ -101,6 +102,7 @@
"训练模型": "Modeli Eğit",
"训练特征索引": "Özellik Dizinini Eğit",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Eğitim tamamlandı. Eğitim günlüklerini konsolda veya deney klasörü altındaki train.log dosyasında kontrol edebilirsiniz.",
"设备类型": "设备类型",
"请指定说话人id": "Lütfen konuşmacı/sanatçı no belirtin:",
"请选择index文件": "Lütfen .index dosyası seçin",
"请选择pth文件": "Lütfen .pth dosyası seçin",
@ -129,7 +131,7 @@
"采样长度": "Örnekleme uzunluğu",
"重载设备列表": "Cihaz listesini yeniden yükle",
"音调设置": "Pitch ayarları",
"音频设备(请使用同种类驱动)": "Ses cihazı (aynı tür sürücüyü kullanın)",
"音频设备": "Ses cihazı",
"音高算法": "音高算法",
"额外推理时长": "Ekstra çıkartma süresi"
}

View File

@ -90,6 +90,7 @@
"版本": "版本",
"特征提取": "特征提取",
"特征检索库文件路径,为空则使用下拉的选择结果": "特征检索库文件路径,为空则使用下拉的选择结果",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ",
"目标采样率": "目标采样率",
"算法延迟(ms):": "算法延迟(ms):",
@ -101,6 +102,7 @@
"训练模型": "训练模型",
"训练特征索引": "训练特征索引",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"设备类型": "设备类型",
"请指定说话人id": "请指定说话人id",
"请选择index文件": "请选择index文件",
"请选择pth文件": "请选择pth文件",
@ -129,7 +131,7 @@
"采样长度": "采样长度",
"重载设备列表": "重载设备列表",
"音调设置": "音调设置",
"音频设备(请使用同种类驱动)": "音频设备(请使用同种类驱动)",
"音频设备": "音频设备",
"音高算法": "音高算法",
"额外推理时长": "额外推理时长"
}

View File

@ -90,6 +90,7 @@
"版本": "版本",
"特征提取": "特徵提取",
"特征检索库文件路径,为空则使用下拉的选择结果": "特徵檢索庫檔路徑,為空則使用下拉的選擇結果",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "男性轉女性推薦+12key女性轉男性推薦-12key如果音域爆炸導致音色失真也可以自己調整到合適音域。",
"目标采样率": "目標取樣率",
"算法延迟(ms):": "算法延迟(ms):",
@ -101,6 +102,7 @@
"训练模型": "訓練模型",
"训练特征索引": "訓練特徵索引",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"设备类型": "设备类型",
"请指定说话人id": "請指定說話人id",
"请选择index文件": "请选择index文件",
"请选择pth文件": "请选择pth文件",
@ -129,7 +131,7 @@
"采样长度": "取樣長度",
"重载设备列表": "重載設備列表",
"音调设置": "音調設定",
"音频设备(请使用同种类驱动)": "音訊設備 (請使用同種類驅動)",
"音频设备": "音訊設備",
"音高算法": "音高演算法",
"额外推理时长": "額外推理時長"
}

View File

@ -90,6 +90,7 @@
"版本": "版本",
"特征提取": "特徵提取",
"特征检索库文件路径,为空则使用下拉的选择结果": "特徵檢索庫檔路徑,為空則使用下拉的選擇結果",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "男性轉女性推薦+12key女性轉男性推薦-12key如果音域爆炸導致音色失真也可以自己調整到合適音域。",
"目标采样率": "目標取樣率",
"算法延迟(ms):": "算法延迟(ms):",
@ -101,6 +102,7 @@
"训练模型": "訓練模型",
"训练特征索引": "訓練特徵索引",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"设备类型": "设备类型",
"请指定说话人id": "請指定說話人id",
"请选择index文件": "请选择index文件",
"请选择pth文件": "请选择pth文件",
@ -129,7 +131,7 @@
"采样长度": "取樣長度",
"重载设备列表": "重載設備列表",
"音调设置": "音調設定",
"音频设备(请使用同种类驱动)": "音訊設備 (請使用同種類驅動)",
"音频设备": "音訊設備",
"音高算法": "音高演算法",
"额外推理时长": "額外推理時長"
}

View File

@ -90,6 +90,7 @@
"版本": "版本",
"特征提取": "特徵提取",
"特征检索库文件路径,为空则使用下拉的选择结果": "特徵檢索庫檔路徑,為空則使用下拉的選擇結果",
"独占 WASAPI 设备": "独占 WASAPI 设备",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "男性轉女性推薦+12key女性轉男性推薦-12key如果音域爆炸導致音色失真也可以自己調整到合適音域。",
"目标采样率": "目標取樣率",
"算法延迟(ms):": "算法延迟(ms):",
@ -101,6 +102,7 @@
"训练模型": "訓練模型",
"训练特征索引": "訓練特徵索引",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"设备类型": "设备类型",
"请指定说话人id": "請指定說話人id",
"请选择index文件": "请选择index文件",
"请选择pth文件": "请选择pth文件",
@ -129,7 +131,7 @@
"采样长度": "取樣長度",
"重载设备列表": "重載設備列表",
"音调设置": "音調設定",
"音频设备(请使用同种类驱动)": "音訊設備 (請使用同種類驅動)",
"音频设备": "音訊設備",
"音高算法": "音高演算法",
"额外推理时长": "額外推理時長"
}

View File

@ -104,10 +104,11 @@ def main():
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = str(randint(20000, 55555))
children = []
logger = utils.get_logger(hps.model_dir)
for i in range(n_gpus):
subproc = mp.Process(
target=run,
args=(i, n_gpus, hps),
args=(i, n_gpus, hps, logger),
)
children.append(subproc)
subproc.start()
@ -116,14 +117,10 @@ def main():
children[i].join()
def run(
rank,
n_gpus,
hps,
):
def run(rank, n_gpus, hps, logger: logging.Logger):
global global_step
if rank == 0:
logger = utils.get_logger(hps.model_dir)
# logger = utils.get_logger(hps.model_dir)
logger.info(hps)
# utils.check_git_hash(hps.model_dir)
writer = SummaryWriter(log_dir=hps.model_dir)

3642
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,64 +0,0 @@
[tool.poetry]
name = "rvc-beta"
version = "0.1.0"
description = ""
authors = ["lj1995"]
license = "MIT"
[tool.poetry.dependencies]
python = "^3.8"
torch = "^2.0.0"
torchaudio = "^2.0.1"
Cython = "^0.29.34"
gradio = "^4.11.0"
future = "^0.18.3"
pydub = "^0.25.1"
soundfile = "^0.12.1"
ffmpeg-python = "^0.2.0"
tensorboardX = "^2.6"
functorch = "^2.0.0"
fairseq = "^0.12.2"
faiss-cpu = "^1.7.2"
Jinja2 = "^3.1.2"
json5 = "^0.9.11"
librosa = "0.9.1"
llvmlite = "0.39.0"
Markdown = "^3.4.3"
matplotlib = "^3.7.1"
matplotlib-inline = "^0.1.6"
numba = "0.56.4"
numpy = "1.23.5"
scipy = "1.9.3"
praat-parselmouth = "^0.4.3"
Pillow = "9.3.0"
pyworld = "^0.3.2"
resampy = "^0.4.2"
scikit-learn = "^1.2.2"
starlette = "^0.27.0"
tensorboard = "^2.12.1"
tensorboard-data-server = "^0.7.0"
tensorboard-plugin-wit = "^1.8.1"
torchgen = "^0.0.1"
tqdm = "^4.65.0"
tornado = "^6.3"
Werkzeug = "^2.2.3"
uc-micro-py = "^1.0.1"
sympy = "^1.11.1"
tabulate = "^0.9.0"
PyYAML = "^6.0"
pyasn1 = "^0.4.8"
pyasn1-modules = "^0.2.8"
fsspec = "^2023.3.0"
absl-py = "^1.4.0"
audioread = "^3.0.0"
uvicorn = "^0.21.1"
colorama = "^0.4.6"
torchcrepe = "0.0.20"
python-dotenv = "^1.0.0"
av = "^10.0.0"
[tool.poetry.dev-dependencies]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

View File

@ -1,566 +1,81 @@
#!/bin/bash
#!/bin/sh
echo working dir is $(pwd)
echo downloading requirement aria2 check.
printf "working dir is %s\n" "$PWD"
echo "downloading requirement aria2 check."
if command -v aria2c &> /dev/null
if command -v aria2c > /dev/null 2>&1
then
echo "aria2c command found"
echo "aria2 command found"
else
echo failed. please install aria2
sleep 5
echo "failed. please install aria2"
exit 1
fi
d32="f0D32k.pth"
d40="f0D40k.pth"
d48="f0D48k.pth"
g32="f0G32k.pth"
g40="f0G40k.pth"
g48="f0G48k.pth"
echo "dir check start."
d40v2="f0D40k.pth"
g40v2="f0G40k.pth"
check_dir() {
[ -d "$1" ] && printf "dir %s checked\n" "$1" || \
printf "failed. generating dir %s\n" "$1" && mkdir -p "$1"
}
dld32="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth"
dld40="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth"
dld48="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth"
dlg32="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth"
dlg40="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth"
dlg48="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth"
check_dir "./assets/pretrained"
check_dir "./assets/pretrained_v2"
check_dir "./assets/uvr5_weights"
check_dir "./assets/uvr5_weights/onnx_dereverb_By_FoxJoy"
dld40v2="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth"
dlg40v2="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth"
echo "dir check finished."
hp2_all="HP2_all_vocals.pth"
hp3_all="HP3_all_vocals.pth"
hp5_only="HP5_only_main_vocal.pth"
VR_DeEchoAggressive="VR-DeEchoAggressive.pth"
VR_DeEchoDeReverb="VR-DeEchoDeReverb.pth"
VR_DeEchoNormal="VR-DeEchoNormal.pth"
onnx_dereverb="vocals.onnx"
rmvpe="rmvpe.pt"
dlhp2_all="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth"
dlhp3_all="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth"
dlhp5_only="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth"
dlVR_DeEchoAggressive="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth"
dlVR_DeEchoDeReverb="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth"
dlVR_DeEchoNormal="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth"
dlonnx_dereverb="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx"
dlrmvpe="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt"
hb="hubert_base.pt"
dlhb="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt"
echo dir check start.
if [ -d "./assets/pretrained" ]; then
echo dir ./assets/pretrained checked.
else
echo failed. generating dir ./assets/pretrained.
mkdir pretrained
fi
if [ -d "./assets/pretrained_v2" ]; then
echo dir ./assets/pretrained_v2 checked.
else
echo failed. generating dir ./assets/pretrained_v2.
mkdir pretrained_v2
fi
if [ -d "./assets/uvr5_weights" ]; then
echo dir ./assets/uvr5_weights checked.
else
echo failed. generating dir ./assets/uvr5_weights.
mkdir uvr5_weights
fi
if [ -d "./assets/uvr5_weights/onnx_dereverb_By_FoxJoy" ]; then
echo dir ./assets/uvr5_weights/onnx_dereverb_By_FoxJoy checked.
else
echo failed. generating dir ./assets/uvr5_weights/onnx_dereverb_By_FoxJoy.
mkdir uvr5_weights/onnx_dereverb_By_FoxJoy
fi
echo dir check finished.
echo required files check start.
echo checking D32k.pth
if [ -f "./assets/pretrained/D32k.pth" ]; then
echo D32k.pth in ./assets/pretrained checked.
echo "required files check start."
check_file_pretrained() {
printf "checking %s\n" "$2"
if [ -f "./assets/""$1""/""$2""" ]; then
printf "%s in ./assets/%s checked.\n" "$2" "$1"
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d ./assets/pretrained -o D32k.pth
if [ -f "./assets/pretrained/D32k.pth" ]; then
echo download successful.
if command -v aria2c > /dev/null 2>&1; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"$1"/"$2" -d ./assets/"$1" -o "$2"
[ -f "./assets/""$1""/""$2""" ] && echo "download successful." || echo "please try again!" && exit 1
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
echo "aria2c command not found. Please install aria2c and try again."
exit 1
fi
fi
}
echo checking D40k.pth
if [ -f "./assets/pretrained/D40k.pth" ]; then
echo D40k.pth in ./assets/pretrained checked.
check_file_special() {
printf "checking %s\n" "$2"
if [ -f "./assets/""$1""/""$2""" ]; then
printf "%s in ./assets/%s checked.\n" "$2" "$1"
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d ./assets/pretrained -o D40k.pth
if [ -f "./assets/pretrained/D40k.pth" ]; then
echo download successful.
if command -v aria2c > /dev/null 2>&1; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"$2" -d ./assets/"$1" -o "$2"
[ -f "./assets/""$1""/""$2""" ] && echo "download successful." || echo "please try again!" && exit 1
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
echo "aria2c command not found. Please install aria2c and try again."
exit 1
fi
fi
}
echo checking D40k.pth
if [ -f "./assets/pretrained_v2/D40k.pth" ]; then
echo D40k.pth in ./assets/pretrained_v2 checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d ./assets/pretrained_v2 -o D40k.pth
if [ -f "./assets/pretrained_v2/D40k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
check_file_pretrained pretrained D32k.pth
check_file_pretrained pretrained D40k.pth
check_file_pretrained pretrained D48k.pth
check_file_pretrained pretrained G32k.pth
check_file_pretrained pretrained G40k.pth
check_file_pretrained pretrained G48k.pth
check_file_pretrained pretrained_v2 f0D40k.pth
check_file_pretrained pretrained_v2 f0G40k.pth
check_file_pretrained pretrained_v2 D40k.pth
check_file_pretrained pretrained_v2 G40k.pth
check_file_pretrained uvr5_weights HP2_all_vocals.pth
check_file_pretrained uvr5_weights HP3_all_vocals.pth
check_file_pretrained uvr5_weights HP5_only_main_vocal.pth
check_file_pretrained uvr5_weights VR-DeEchoAggressive.pth
check_file_pretrained uvr5_weights VR-DeEchoDeReverb.pth
check_file_pretrained uvr5_weights VR-DeEchoNormal.pth
check_file_pretrained uvr5_weights "onnx_dereverb_By_FoxJoy/vocals.onnx"
check_file_special rmvpe rmvpe.pt
check_file_special hubert hubert_base.pt
echo checking D48k.pth
if [ -f "./assets/pretrained/D48k.pth" ]; then
echo D48k.pth in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d ./assets/pretrained -o D48k.pth
if [ -f "./assets/pretrained/D48k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking G32k.pth
if [ -f "./assets/pretrained/G32k.pth" ]; then
echo G32k.pth in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d ./assets/pretrained -o G32k.pth
if [ -f "./assets/pretrained/G32k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking G40k.pth
if [ -f "./assets/pretrained/G40k.pth" ]; then
echo G40k.pth in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d ./assets/pretrained -o G40k.pth
if [ -f "./assets/pretrained/G40k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking G40k.pth
if [ -f "./assets/pretrained_v2/G40k.pth" ]; then
echo G40k.pth in ./assets/pretrained_v2 checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d ./assets/pretrained_v2 -o G40k.pth
if [ -f "./assets/pretrained_v2/G40k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking G48k.pth
if [ -f "./assets/pretrained/G48k.pth" ]; then
echo G48k.pth in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d ./assets/pretrained -o G48k.pth
if [ -f "./assets/pretrained/G48k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $d32
if [ -f "./assets/pretrained/$d32" ]; then
echo $d32 in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld32 -d ./assets/pretrained -o $d32
if [ -f "./assets/pretrained/$d32" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $d40
if [ -f "./assets/pretrained/$d40" ]; then
echo $d40 in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld40 -d ./assets/pretrained -o $d40
if [ -f "./assets/pretrained/$d40" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $d40v2
if [ -f "./assets/pretrained_v2/$d40v2" ]; then
echo $d40v2 in ./assets/pretrained_v2 checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld40v2 -d ./assets/pretrained_v2 -o $d40v2
if [ -f "./assets/pretrained_v2/$d40v2" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $d48
if [ -f "./assets/pretrained/$d48" ]; then
echo $d48 in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld48 -d ./assets/pretrained -o $d48
if [ -f "./assets/pretrained/$d48" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $g32
if [ -f "./assets/pretrained/$g32" ]; then
echo $g32 in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg32 -d ./assets/pretrained -o $g32
if [ -f "./assets/pretrained/$g32" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $g40
if [ -f "./assets/pretrained/$g40" ]; then
echo $g40 in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg40 -d ./assets/pretrained -o $g40
if [ -f "./assets/pretrained/$g40" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $g40v2
if [ -f "./assets/pretrained_v2/$g40v2" ]; then
echo $g40v2 in ./assets/pretrained_v2 checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg40v2 -d ./assets/pretrained_v2 -o $g40v2
if [ -f "./assets/pretrained_v2/$g40v2" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $g48
if [ -f "./assets/pretrained/$g48" ]; then
echo $g48 in ./assets/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg48 -d ./assets/pretrained -o $g48
if [ -f "./assets/pretrained/$g48" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $hp2_all
if [ -f "./assets/uvr5_weights/$hp2_all" ]; then
echo $hp2_all in ./assets/uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhp2_all -d ./assets/uvr5_weights -o $hp2_all
if [ -f "./assets/uvr5_weights/$hp2_all" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $hp3_all
if [ -f "./assets/uvr5_weights/$hp3_all" ]; then
echo $hp3_all in ./assets/uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhp3_all -d ./assets/uvr5_weights -o $hp3_all
if [ -f "./assets/uvr5_weights/$hp3_all" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $hp5_only
if [ -f "./assets/uvr5_weights/$hp5_only" ]; then
echo $hp5_only in ./assets/uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhp5_only -d ./assets/uvr5_weights -o $hp5_only
if [ -f "./assets/uvr5_weights/$hp5_only" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $VR_DeEchoAggressive
if [ -f "./assets/uvr5_weights/$VR_DeEchoAggressive" ]; then
echo $VR_DeEchoAggressive in ./assets/uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlVR_DeEchoAggressive -d ./assets/uvr5_weights -o $VR_DeEchoAggressive
if [ -f "./assets/uvr5_weights/$VR_DeEchoAggressive" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $VR_DeEchoDeReverb
if [ -f "./assets/uvr5_weights/$VR_DeEchoDeReverb" ]; then
echo $VR_DeEchoDeReverb in ./assets/uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlVR_DeEchoDeReverb -d ./assets/uvr5_weights -o $VR_DeEchoDeReverb
if [ -f "./assets/uvr5_weights/$VR_DeEchoDeReverb" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $VR_DeEchoNormal
if [ -f "./assets/uvr5_weights/$VR_DeEchoNormal" ]; then
echo $VR_DeEchoNormal in ./assets/uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlVR_DeEchoNormal -d ./assets/uvr5_weights -o $VR_DeEchoNormal
if [ -f "./assets/uvr5_weights/$VR_DeEchoNormal" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $onnx_dereverb
if [ -f "./assets/uvr5_weights/onnx_dereverb_By_FoxJoy/$onnx_dereverb" ]; then
echo $onnx_dereverb in ./assets/uvr5_weights/onnx_dereverb_By_FoxJoy checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlonnx_dereverb -d ./assets/uvr5_weights/onnx_dereverb_By_FoxJoy -o $onnx_dereverb
if [ -f "./assets/uvr5_weights/onnx_dereverb_By_FoxJoy/$onnx_dereverb" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $rmvpe
if [ -f "./assets/rmvpe/$rmvpe" ]; then
echo $rmvpe in ./assets/rmvpe checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlrmvpe -d ./assets/rmvpe -o $rmvpe
if [ -f "./assets/rmvpe/$rmvpe" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $hb
if [ -f "./assets/hubert/$hb" ]; then
echo $hb in ./assets/hubert/pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhb -d ./assets/hubert/ -o $hb
if [ -f "./assets/hubert/$hb" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo required files check finished.
echo "required files check finished."

View File

@ -91,8 +91,12 @@ class RVC:
self.pth_path: str = pth_path
self.index_path = index_path
self.index_rate = index_rate
self.cache_pitch: np.ndarray = np.zeros(1024, dtype="int32")
self.cache_pitchf = np.zeros(1024, dtype="float32")
self.cache_pitch: torch.Tensor = torch.zeros(
1024, device=self.device, dtype=torch.long
)
self.cache_pitchf = torch.zeros(
1024, device=self.device, dtype=torch.float32
)
if last_rvc is None:
models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
@ -199,15 +203,17 @@ class RVC:
self.index_rate = new_index_rate
def get_f0_post(self, f0):
f0bak = f0.copy()
f0_mel = 1127 * np.log(1 + f0 / 700)
if not torch.is_tensor(f0):
f0 = torch.from_numpy(f0)
f0 = f0.float().to(self.device).squeeze()
f0_mel = 1127 * torch.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / (
self.f0_mel_max - self.f0_mel_min
) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > 255] = 255
f0_coarse = np.rint(f0_mel).astype(np.int32)
return f0_coarse, f0bak
f0_coarse = torch.round(f0_mel).long()
return f0_coarse, f0
def get_f0(self, x, f0_up_key, n_cpu, method="harvest"):
n_cpu = int(n_cpu)
@ -299,7 +305,6 @@ class RVC:
pd = torchcrepe.filter.median(pd, 3)
f0 = torchcrepe.filter.mean(f0, 3)
f0[pd < 0.1] = 0
f0 = f0[0].cpu().numpy()
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
@ -335,7 +340,6 @@ class RVC:
threshold=0.006,
)
f0 *= pow(2, f0_up_key / 12)
f0 = f0.squeeze().cpu().numpy()
return self.get_f0_post(f0)
def infer(
@ -383,6 +387,7 @@ class RVC:
traceback.print_exc()
printt("Index search FAILED")
t3 = ttime()
p_len = input_wav.shape[0] // 160
if self.if_f0 == 1:
f0_extractor_frame = block_frame_16k + 800
if f0method == "rmvpe":
@ -390,25 +395,14 @@ class RVC:
pitch, pitchf = self.get_f0(
input_wav[-f0_extractor_frame:], self.f0_up_key, self.n_cpu, f0method
)
start_frame = block_frame_16k // 160
end_frame = len(self.cache_pitch) - (pitch.shape[0] - 4) + start_frame
self.cache_pitch[:] = np.append(
self.cache_pitch[start_frame:end_frame], pitch[3:-1]
)
self.cache_pitchf[:] = np.append(
self.cache_pitchf[start_frame:end_frame], pitchf[3:-1]
)
shift = block_frame_16k // 160
self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone()
self.cache_pitchf[:-shift] = self.cache_pitchf[shift:].clone()
self.cache_pitch[4 - pitch.shape[0] :] = pitch[3:-1]
self.cache_pitchf[4 - pitch.shape[0] :] = pitchf[3:-1]
cache_pitch = self.cache_pitch[None, -p_len:]
cache_pitchf = self.cache_pitchf[None, -p_len:]
t4 = ttime()
p_len = input_wav.shape[0] // 160
if self.if_f0 == 1:
cache_pitch = (
torch.LongTensor(self.cache_pitch[-p_len:]).to(self.device).unsqueeze(0)
)
cache_pitchf = (
torch.FloatTensor(self.cache_pitchf[-p_len:])
.to(self.device)
.unsqueeze(0)
)
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
feats = feats[:, :p_len, :]
p_len = torch.LongTensor([p_len]).to(self.device)