mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-05-06 20:01:37 +08:00
New feature of real-time voice changing: formant shift adjustment (#1999)
* add formant shift for realtime-gui * chore(i18n): sync locale on dev * chore(format): run black on dev * fix --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
189eef5936
commit
0ab3a3296e
@ -1 +1 @@
|
|||||||
{"pth_path": "assets/weights/kikiV1.pth", "index_path": "logs/kikiV1.index", "sg_hostapi": "MME", "sg_wasapi_exclusive": false, "sg_input_device": "VoiceMeeter Output (VB-Audio Vo", "sg_output_device": "VoiceMeeter Input (VB-Audio Voi", "sr_type": "sr_device", "threhold": -60.0, "pitch": 12.0, "rms_mix_rate": 0.5, "index_rate": 0.0, "block_time": 0.15, "crossfade_length": 0.08, "extra_time": 2.0, "n_cpu": 4.0, "use_jit": false, "use_pv": false, "f0method": "fcpe"}
|
{"pth_path": "assets/weights/kikiV1.pth", "index_path": "logs/kikiV1.index", "sg_hostapi": "MME", "sg_wasapi_exclusive": false, "sg_input_device": "VoiceMeeter Output (VB-Audio Vo", "sg_output_device": "VoiceMeeter Input (VB-Audio Voi", "sr_type": "sr_device", "threhold": -60.0, "pitch": 12.0, "formant": 0.0, "rms_mix_rate": 0.5, "index_rate": 0.0, "block_time": 0.15, "crossfade_length": 0.08, "extra_time": 2.0, "n_cpu": 4.0, "use_jit": false, "use_pv": false, "f0method": "fcpe"}
|
20
gui_v1.py
20
gui_v1.py
@ -114,6 +114,7 @@ if __name__ == "__main__":
|
|||||||
self.pth_path: str = ""
|
self.pth_path: str = ""
|
||||||
self.index_path: str = ""
|
self.index_path: str = ""
|
||||||
self.pitch: int = 0
|
self.pitch: int = 0
|
||||||
|
self.formant: float = 0.0
|
||||||
self.sr_type: str = "sr_model"
|
self.sr_type: str = "sr_model"
|
||||||
self.block_time: float = 0.25 # s
|
self.block_time: float = 0.25 # s
|
||||||
self.threhold: int = -60
|
self.threhold: int = -60
|
||||||
@ -212,6 +213,7 @@ if __name__ == "__main__":
|
|||||||
"sr_type": "sr_model",
|
"sr_type": "sr_model",
|
||||||
"threhold": -60,
|
"threhold": -60,
|
||||||
"pitch": 0,
|
"pitch": 0,
|
||||||
|
"formant": 0.0,
|
||||||
"index_rate": 0,
|
"index_rate": 0,
|
||||||
"rms_mix_rate": 0,
|
"rms_mix_rate": 0,
|
||||||
"block_time": 0.25,
|
"block_time": 0.25,
|
||||||
@ -354,6 +356,17 @@ if __name__ == "__main__":
|
|||||||
enable_events=True,
|
enable_events=True,
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
[
|
||||||
|
sg.Text(i18n("共振偏移")),
|
||||||
|
sg.Slider(
|
||||||
|
range=(-5, 5),
|
||||||
|
key="formant",
|
||||||
|
resolution=0.01,
|
||||||
|
orientation="h",
|
||||||
|
default_value=data.get("formant", 0.0),
|
||||||
|
enable_events=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
[
|
[
|
||||||
sg.Text(i18n("Index Rate")),
|
sg.Text(i18n("Index Rate")),
|
||||||
sg.Slider(
|
sg.Slider(
|
||||||
@ -579,6 +592,7 @@ if __name__ == "__main__":
|
|||||||
],
|
],
|
||||||
"threhold": values["threhold"],
|
"threhold": values["threhold"],
|
||||||
"pitch": values["pitch"],
|
"pitch": values["pitch"],
|
||||||
|
"formant": values["formant"],
|
||||||
"rms_mix_rate": values["rms_mix_rate"],
|
"rms_mix_rate": values["rms_mix_rate"],
|
||||||
"index_rate": values["index_rate"],
|
"index_rate": values["index_rate"],
|
||||||
# "device_latency": values["device_latency"],
|
# "device_latency": values["device_latency"],
|
||||||
@ -621,6 +635,10 @@ if __name__ == "__main__":
|
|||||||
self.gui_config.pitch = values["pitch"]
|
self.gui_config.pitch = values["pitch"]
|
||||||
if hasattr(self, "rvc"):
|
if hasattr(self, "rvc"):
|
||||||
self.rvc.change_key(values["pitch"])
|
self.rvc.change_key(values["pitch"])
|
||||||
|
elif event == "formant":
|
||||||
|
self.gui_config.formant = values["formant"]
|
||||||
|
if hasattr(self, "rvc"):
|
||||||
|
self.rvc.change_formant(values["formant"])
|
||||||
elif event == "index_rate":
|
elif event == "index_rate":
|
||||||
self.gui_config.index_rate = values["index_rate"]
|
self.gui_config.index_rate = values["index_rate"]
|
||||||
if hasattr(self, "rvc"):
|
if hasattr(self, "rvc"):
|
||||||
@ -679,6 +697,7 @@ if __name__ == "__main__":
|
|||||||
]
|
]
|
||||||
self.gui_config.threhold = values["threhold"]
|
self.gui_config.threhold = values["threhold"]
|
||||||
self.gui_config.pitch = values["pitch"]
|
self.gui_config.pitch = values["pitch"]
|
||||||
|
self.gui_config.formant = values["formant"]
|
||||||
self.gui_config.block_time = values["block_time"]
|
self.gui_config.block_time = values["block_time"]
|
||||||
self.gui_config.crossfade_time = values["crossfade_length"]
|
self.gui_config.crossfade_time = values["crossfade_length"]
|
||||||
self.gui_config.extra_time = values["extra_time"]
|
self.gui_config.extra_time = values["extra_time"]
|
||||||
@ -703,6 +722,7 @@ if __name__ == "__main__":
|
|||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
self.rvc = rtrvc.RVC(
|
self.rvc = rtrvc.RVC(
|
||||||
self.gui_config.pitch,
|
self.gui_config.pitch,
|
||||||
|
self.gui_config.formant,
|
||||||
self.gui_config.pth_path,
|
self.gui_config.pth_path,
|
||||||
self.gui_config.index_path,
|
self.gui_config.index_path,
|
||||||
self.gui_config.index_rate,
|
self.gui_config.index_rate,
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modify model information (only supported for small model files extracted from the 'weights' folder)",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modify model information (only supported for small model files extracted from the 'weights' folder)",
|
||||||
"停止音频转换": "Stop audio conversion",
|
"停止音频转换": "Stop audio conversion",
|
||||||
"全流程结束!": "All processes have been completed!",
|
"全流程结束!": "All processes have been completed!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "Refresh voice list and index path",
|
"刷新音色列表和索引路径": "Refresh voice list and index path",
|
||||||
"加载模型": "Load model",
|
"加载模型": "Load model",
|
||||||
"加载预训练底模D路径": "Load pre-trained base model D path:",
|
"加载预训练底模D路径": "Load pre-trained base model D path:",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modificar la información del modelo (solo admite archivos de modelos pequeños extraídos en la carpeta weights)",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modificar la información del modelo (solo admite archivos de modelos pequeños extraídos en la carpeta weights)",
|
||||||
"停止音频转换": "Detener la conversión de audio",
|
"停止音频转换": "Detener la conversión de audio",
|
||||||
"全流程结束!": "¡Todo el proceso ha terminado!",
|
"全流程结束!": "¡Todo el proceso ha terminado!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "Actualizar la lista de modelos e índice de rutas",
|
"刷新音色列表和索引路径": "Actualizar la lista de modelos e índice de rutas",
|
||||||
"加载模型": "Cargar modelo",
|
"加载模型": "Cargar modelo",
|
||||||
"加载预训练底模D路径": "Cargue la ruta del modelo D base pre-entrenada.",
|
"加载预训练底模D路径": "Cargue la ruta del modelo D base pre-entrenada.",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modifier les informations du modèle (uniquement pris en charge pour les petits fichiers de modèle extraits du dossier 'weights')",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modifier les informations du modèle (uniquement pris en charge pour les petits fichiers de modèle extraits du dossier 'weights')",
|
||||||
"停止音频转换": "Arrêter la conversion audio",
|
"停止音频转换": "Arrêter la conversion audio",
|
||||||
"全流程结束!": "Toutes les étapes ont été terminées !",
|
"全流程结束!": "Toutes les étapes ont été terminées !",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "Actualiser la liste des voix et le vers l'index.",
|
"刷新音色列表和索引路径": "Actualiser la liste des voix et le vers l'index.",
|
||||||
"加载模型": "Charger le modèle.",
|
"加载模型": "Charger le modèle.",
|
||||||
"加载预训练底模D路径": "Charger le chemin du modèle de base pré-entraîné D :",
|
"加载预训练底模D路径": "Charger le chemin du modèle de base pré-entraîné D :",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modifica le informazioni sul modello (supportato solo per i file di modello di piccole dimensioni estratti dalla cartella 'weights')",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modifica le informazioni sul modello (supportato solo per i file di modello di piccole dimensioni estratti dalla cartella 'weights')",
|
||||||
"停止音频转换": "Arresta la conversione audio",
|
"停止音频转换": "Arresta la conversione audio",
|
||||||
"全流程结束!": "Tutti i processi sono stati completati!",
|
"全流程结束!": "Tutti i processi sono stati completati!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "Aggiorna l'elenco delle voci e il percorso dell'indice",
|
"刷新音色列表和索引路径": "Aggiorna l'elenco delle voci e il percorso dell'indice",
|
||||||
"加载模型": "Carica modello",
|
"加载模型": "Carica modello",
|
||||||
"加载预训练底模D路径": "Carica il percorso D del modello base pre-addestrato:",
|
"加载预训练底模D路径": "Carica il percorso D del modello base pre-addestrato:",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "モデル情報の修正(weightsフォルダから抽出された小さなモデルファイルのみ対応)",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "モデル情報の修正(weightsフォルダから抽出された小さなモデルファイルのみ対応)",
|
||||||
"停止音频转换": "音声変換を停止",
|
"停止音频转换": "音声変換を停止",
|
||||||
"全流程结束!": "全工程が完了!",
|
"全流程结束!": "全工程が完了!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "音源リストとインデックスパスの更新",
|
"刷新音色列表和索引路径": "音源リストとインデックスパスの更新",
|
||||||
"加载模型": "モデルをロード",
|
"加载模型": "モデルをロード",
|
||||||
"加载预训练底模D路径": "事前学習済みのDモデルのパス",
|
"加载预训练底模D路径": "事前学習済みのDモデルのパス",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "모델 정보 수정(오직 weights 폴더 아래에서 추출된 작은 모델 파일만 지원)",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "모델 정보 수정(오직 weights 폴더 아래에서 추출된 작은 모델 파일만 지원)",
|
||||||
"停止音频转换": "오디오 변환 중지",
|
"停止音频转换": "오디오 변환 중지",
|
||||||
"全流程结束!": "전체 과정 완료!",
|
"全流程结束!": "전체 과정 완료!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "음색 목록 및 인덱스 경로 새로고침",
|
"刷新音色列表和索引路径": "음색 목록 및 인덱스 경로 새로고침",
|
||||||
"加载模型": "모델 로드",
|
"加载模型": "모델 로드",
|
||||||
"加载预训练底模D路径": "미리 훈련된 베이스 모델 D 경로 로드",
|
"加载预训练底模D路径": "미리 훈련된 베이스 모델 D 경로 로드",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modificar informações do modelo (suportado apenas para arquivos de modelo pequenos extraídos da pasta 'weights')",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modificar informações do modelo (suportado apenas para arquivos de modelo pequenos extraídos da pasta 'weights')",
|
||||||
"停止音频转换": "Conversão de áudio",
|
"停止音频转换": "Conversão de áudio",
|
||||||
"全流程结束!": "Todos os processos foram concluídos!",
|
"全流程结束!": "Todos os processos foram concluídos!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "Atualizar lista de voz e caminho do Index",
|
"刷新音色列表和索引路径": "Atualizar lista de voz e caminho do Index",
|
||||||
"加载模型": "Modelo",
|
"加载模型": "Modelo",
|
||||||
"加载预训练底模D路径": "Carregue o caminho D do modelo base pré-treinado:",
|
"加载预训练底模D路径": "Carregue o caminho D do modelo base pré-treinado:",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Изменить информацию о модели (работает только с маленькими моделями, взятыми из папки 'weights')",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Изменить информацию о модели (работает только с маленькими моделями, взятыми из папки 'weights')",
|
||||||
"停止音频转换": "Закончить конвертацию аудио",
|
"停止音频转换": "Закончить конвертацию аудио",
|
||||||
"全流程结束!": "Все процессы завершены!",
|
"全流程结束!": "Все процессы завершены!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "Обновить список голосов и индексов",
|
"刷新音色列表和索引路径": "Обновить список голосов и индексов",
|
||||||
"加载模型": "Загрузить модель",
|
"加载模型": "Загрузить модель",
|
||||||
"加载预训练底模D路径": "Путь к предварительно обученной базовой модели D:",
|
"加载预训练底模D路径": "Путь к предварительно обученной базовой модели D:",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Model bilgilerini düzenle (sadece 'weights' klasöründen çıkarılan küçük model dosyaları desteklenir)",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Model bilgilerini düzenle (sadece 'weights' klasöründen çıkarılan küçük model dosyaları desteklenir)",
|
||||||
"停止音频转换": "Ses dönüştürmeyi durdur",
|
"停止音频转换": "Ses dönüştürmeyi durdur",
|
||||||
"全流程结束!": "Tüm işlemler tamamlandı!",
|
"全流程结束!": "Tüm işlemler tamamlandı!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "Ses listesini ve indeks yolunu yenile",
|
"刷新音色列表和索引路径": "Ses listesini ve indeks yolunu yenile",
|
||||||
"加载模型": "Model yükle",
|
"加载模型": "Model yükle",
|
||||||
"加载预训练底模D路径": "Önceden eğitilmiş temel D modelini yükleme yolu:",
|
"加载预训练底模D路径": "Önceden eğitilmiş temel D modelini yükleme yolu:",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型信息(仅支持weights文件夹下提取的小模型文件)",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型信息(仅支持weights文件夹下提取的小模型文件)",
|
||||||
"停止音频转换": "停止音频转换",
|
"停止音频转换": "停止音频转换",
|
||||||
"全流程结束!": "全流程结束!",
|
"全流程结束!": "全流程结束!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "刷新音色列表和索引路径",
|
"刷新音色列表和索引路径": "刷新音色列表和索引路径",
|
||||||
"加载模型": "加载模型",
|
"加载模型": "加载模型",
|
||||||
"加载预训练底模D路径": "加载预训练底模D路径",
|
"加载预训练底模D路径": "加载预训练底模D路径",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型資訊(僅支援weights資料夾下提取的小模型檔案)",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型資訊(僅支援weights資料夾下提取的小模型檔案)",
|
||||||
"停止音频转换": "停止音訊轉換",
|
"停止音频转换": "停止音訊轉換",
|
||||||
"全流程结束!": "全流程结束!",
|
"全流程结束!": "全流程结束!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "刷新音色列表和索引路徑",
|
"刷新音色列表和索引路径": "刷新音色列表和索引路徑",
|
||||||
"加载模型": "載入模型",
|
"加载模型": "載入模型",
|
||||||
"加载预训练底模D路径": "加載預訓練底模D路徑",
|
"加载预训练底模D路径": "加載預訓練底模D路徑",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型資訊(僅支援weights資料夾下提取的小模型檔案)",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型資訊(僅支援weights資料夾下提取的小模型檔案)",
|
||||||
"停止音频转换": "停止音訊轉換",
|
"停止音频转换": "停止音訊轉換",
|
||||||
"全流程结束!": "全流程结束!",
|
"全流程结束!": "全流程结束!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "刷新音色列表和索引路徑",
|
"刷新音色列表和索引路径": "刷新音色列表和索引路徑",
|
||||||
"加载模型": "載入模型",
|
"加载模型": "載入模型",
|
||||||
"加载预训练底模D路径": "加載預訓練底模D路徑",
|
"加载预训练底模D路径": "加載預訓練底模D路徑",
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型資訊(僅支援weights資料夾下提取的小模型檔案)",
|
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "修改模型資訊(僅支援weights資料夾下提取的小模型檔案)",
|
||||||
"停止音频转换": "停止音訊轉換",
|
"停止音频转换": "停止音訊轉換",
|
||||||
"全流程结束!": "全流程结束!",
|
"全流程结束!": "全流程结束!",
|
||||||
|
"共振偏移": "共振偏移",
|
||||||
"刷新音色列表和索引路径": "刷新音色列表和索引路徑",
|
"刷新音色列表和索引路径": "刷新音色列表和索引路徑",
|
||||||
"加载模型": "載入模型",
|
"加载模型": "載入模型",
|
||||||
"加载预训练底模D路径": "加載預訓練底模D路徑",
|
"加载预训练底模D路径": "加載預訓練底模D路徑",
|
||||||
|
@ -10,7 +10,6 @@ from torch import nn
|
|||||||
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
|
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
|
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
|
||||||
|
|
||||||
from infer.lib.infer_pack import attentions, commons, modules
|
from infer.lib.infer_pack import attentions, commons, modules
|
||||||
from infer.lib.infer_pack.commons import get_padding, init_weights
|
from infer.lib.infer_pack.commons import get_padding, init_weights
|
||||||
|
|
||||||
@ -250,7 +249,17 @@ class Generator(torch.nn.Module):
|
|||||||
if gin_channels != 0:
|
if gin_channels != 0:
|
||||||
self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1)
|
self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1)
|
||||||
|
|
||||||
def forward(self, x: torch.Tensor, g: Optional[torch.Tensor] = None):
|
def forward(
|
||||||
|
self,
|
||||||
|
x: torch.Tensor,
|
||||||
|
g: Optional[torch.Tensor] = None,
|
||||||
|
n_res: Optional[torch.Tensor] = None,
|
||||||
|
):
|
||||||
|
if n_res is not None:
|
||||||
|
assert isinstance(n_res, torch.Tensor)
|
||||||
|
n = int(n_res.item())
|
||||||
|
if n != x.shape[-1]:
|
||||||
|
x = F.interpolate(x, size=n, mode="linear")
|
||||||
x = self.conv_pre(x)
|
x = self.conv_pre(x)
|
||||||
if g is not None:
|
if g is not None:
|
||||||
x = x + self.cond(g)
|
x = x + self.cond(g)
|
||||||
@ -529,9 +538,22 @@ class GeneratorNSF(torch.nn.Module):
|
|||||||
|
|
||||||
self.lrelu_slope = modules.LRELU_SLOPE
|
self.lrelu_slope = modules.LRELU_SLOPE
|
||||||
|
|
||||||
def forward(self, x, f0, g: Optional[torch.Tensor] = None):
|
def forward(
|
||||||
|
self,
|
||||||
|
x,
|
||||||
|
f0,
|
||||||
|
g: Optional[torch.Tensor] = None,
|
||||||
|
n_res: Optional[torch.Tensor] = None,
|
||||||
|
):
|
||||||
har_source, noi_source, uv = self.m_source(f0, self.upp)
|
har_source, noi_source, uv = self.m_source(f0, self.upp)
|
||||||
har_source = har_source.transpose(1, 2)
|
har_source = har_source.transpose(1, 2)
|
||||||
|
if n_res is not None:
|
||||||
|
assert isinstance(n_res, torch.Tensor)
|
||||||
|
n = int(n_res.item())
|
||||||
|
if n * self.upp != har_source.shape[-1]:
|
||||||
|
har_source = F.interpolate(har_source, size=n * self.upp, mode="linear")
|
||||||
|
if n != x.shape[-1]:
|
||||||
|
x = F.interpolate(x, size=n, mode="linear")
|
||||||
x = self.conv_pre(x)
|
x = self.conv_pre(x)
|
||||||
if g is not None:
|
if g is not None:
|
||||||
x = x + self.cond(g)
|
x = x + self.cond(g)
|
||||||
@ -558,6 +580,7 @@ class GeneratorNSF(torch.nn.Module):
|
|||||||
x = F.leaky_relu(x)
|
x = F.leaky_relu(x)
|
||||||
x = self.conv_post(x)
|
x = self.conv_post(x)
|
||||||
x = torch.tanh(x)
|
x = torch.tanh(x)
|
||||||
|
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def remove_weight_norm(self):
|
def remove_weight_norm(self):
|
||||||
@ -748,6 +771,7 @@ class SynthesizerTrnMs256NSFsid(nn.Module):
|
|||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
skip_head: Optional[torch.Tensor] = None,
|
skip_head: Optional[torch.Tensor] = None,
|
||||||
return_length: Optional[torch.Tensor] = None,
|
return_length: Optional[torch.Tensor] = None,
|
||||||
|
return_length2: Optional[torch.Tensor] = None,
|
||||||
):
|
):
|
||||||
g = self.emb_g(sid).unsqueeze(-1)
|
g = self.emb_g(sid).unsqueeze(-1)
|
||||||
if skip_head is not None and return_length is not None:
|
if skip_head is not None and return_length is not None:
|
||||||
@ -767,7 +791,7 @@ class SynthesizerTrnMs256NSFsid(nn.Module):
|
|||||||
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
||||||
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
||||||
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
||||||
o = self.dec(z * x_mask, nsff0, g=g)
|
o = self.dec(z * x_mask, nsff0, g=g, n_res=return_length2)
|
||||||
return o, x_mask, (z, z_p, m_p, logs_p)
|
return o, x_mask, (z, z_p, m_p, logs_p)
|
||||||
|
|
||||||
|
|
||||||
@ -963,6 +987,7 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module):
|
|||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
skip_head: Optional[torch.Tensor] = None,
|
skip_head: Optional[torch.Tensor] = None,
|
||||||
return_length: Optional[torch.Tensor] = None,
|
return_length: Optional[torch.Tensor] = None,
|
||||||
|
return_length2: Optional[torch.Tensor] = None,
|
||||||
):
|
):
|
||||||
g = self.emb_g(sid).unsqueeze(-1)
|
g = self.emb_g(sid).unsqueeze(-1)
|
||||||
if skip_head is not None and return_length is not None:
|
if skip_head is not None and return_length is not None:
|
||||||
@ -981,7 +1006,7 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module):
|
|||||||
m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths)
|
m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths)
|
||||||
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
||||||
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
||||||
o = self.dec(z * x_mask, g=g)
|
o = self.dec(z * x_mask, g=g, n_res=return_length2)
|
||||||
return o, x_mask, (z, z_p, m_p, logs_p)
|
return o, x_mask, (z, z_p, m_p, logs_p)
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import torchcrepe
|
import torchcrepe
|
||||||
|
from torchaudio.transforms import Resample
|
||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
@ -40,6 +41,7 @@ class RVC:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
key,
|
key,
|
||||||
|
formant,
|
||||||
pth_path,
|
pth_path,
|
||||||
index_path,
|
index_path,
|
||||||
index_rate,
|
index_rate,
|
||||||
@ -68,6 +70,7 @@ class RVC:
|
|||||||
# device="cpu"########强制cpu测试
|
# device="cpu"########强制cpu测试
|
||||||
self.device = config.device
|
self.device = config.device
|
||||||
self.f0_up_key = key
|
self.f0_up_key = key
|
||||||
|
self.formant_shift = formant
|
||||||
self.f0_min = 50
|
self.f0_min = 50
|
||||||
self.f0_max = 1100
|
self.f0_max = 1100
|
||||||
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
|
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
|
||||||
@ -90,6 +93,8 @@ class RVC:
|
|||||||
1024, device=self.device, dtype=torch.float32
|
1024, device=self.device, dtype=torch.float32
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.resample_kernel = {}
|
||||||
|
|
||||||
if last_rvc is None:
|
if last_rvc is None:
|
||||||
models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
|
models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
|
||||||
["assets/hubert/hubert_base.pt"],
|
["assets/hubert/hubert_base.pt"],
|
||||||
@ -187,6 +192,9 @@ class RVC:
|
|||||||
def change_key(self, new_key):
|
def change_key(self, new_key):
|
||||||
self.f0_up_key = new_key
|
self.f0_up_key = new_key
|
||||||
|
|
||||||
|
def change_formant(self, new_formant):
|
||||||
|
self.formant_shift = new_formant
|
||||||
|
|
||||||
def change_index_rate(self, new_index_rate):
|
def change_index_rate(self, new_index_rate):
|
||||||
if new_index_rate != 0 and self.index_rate == 0:
|
if new_index_rate != 0 and self.index_rate == 0:
|
||||||
self.index = faiss.read_index(self.index_path)
|
self.index = faiss.read_index(self.index_path)
|
||||||
@ -390,12 +398,14 @@ class RVC:
|
|||||||
printt("Index search FAILED")
|
printt("Index search FAILED")
|
||||||
t3 = ttime()
|
t3 = ttime()
|
||||||
p_len = input_wav.shape[0] // 160
|
p_len = input_wav.shape[0] // 160
|
||||||
|
factor = pow(2, self.formant_shift / 12)
|
||||||
|
return_length2 = int(np.ceil(return_length * factor))
|
||||||
if self.if_f0 == 1:
|
if self.if_f0 == 1:
|
||||||
f0_extractor_frame = block_frame_16k + 800
|
f0_extractor_frame = block_frame_16k + 800
|
||||||
if f0method == "rmvpe":
|
if f0method == "rmvpe":
|
||||||
f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1) - 160
|
f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1) - 160
|
||||||
pitch, pitchf = self.get_f0(
|
pitch, pitchf = self.get_f0(
|
||||||
input_wav[-f0_extractor_frame:], self.f0_up_key, self.n_cpu, f0method
|
input_wav[-f0_extractor_frame:], self.f0_up_key - self.formant_shift, self.n_cpu, f0method
|
||||||
)
|
)
|
||||||
shift = block_frame_16k // 160
|
shift = block_frame_16k // 160
|
||||||
self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone()
|
self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone()
|
||||||
@ -403,13 +413,14 @@ class RVC:
|
|||||||
self.cache_pitch[4 - pitch.shape[0] :] = pitch[3:-1]
|
self.cache_pitch[4 - pitch.shape[0] :] = pitch[3:-1]
|
||||||
self.cache_pitchf[4 - pitch.shape[0] :] = pitchf[3:-1]
|
self.cache_pitchf[4 - pitch.shape[0] :] = pitchf[3:-1]
|
||||||
cache_pitch = self.cache_pitch[None, -p_len:]
|
cache_pitch = self.cache_pitch[None, -p_len:]
|
||||||
cache_pitchf = self.cache_pitchf[None, -p_len:]
|
cache_pitchf = self.cache_pitchf[None, -p_len:] * return_length2 / return_length
|
||||||
t4 = ttime()
|
t4 = ttime()
|
||||||
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
||||||
feats = feats[:, :p_len, :]
|
feats = feats[:, :p_len, :]
|
||||||
p_len = torch.LongTensor([p_len]).to(self.device)
|
p_len = torch.LongTensor([p_len]).to(self.device)
|
||||||
sid = torch.LongTensor([0]).to(self.device)
|
sid = torch.LongTensor([0]).to(self.device)
|
||||||
skip_head = torch.LongTensor([skip_head])
|
skip_head = torch.LongTensor([skip_head])
|
||||||
|
return_length2 = torch.LongTensor([return_length2])
|
||||||
return_length = torch.LongTensor([return_length])
|
return_length = torch.LongTensor([return_length])
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
if self.if_f0 == 1:
|
if self.if_f0 == 1:
|
||||||
@ -421,10 +432,23 @@ class RVC:
|
|||||||
sid,
|
sid,
|
||||||
skip_head,
|
skip_head,
|
||||||
return_length,
|
return_length,
|
||||||
|
return_length2,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
infered_audio, _, _ = self.net_g.infer(
|
infered_audio, _, _ = self.net_g.infer(
|
||||||
feats, p_len, sid, skip_head, return_length
|
feats, p_len, sid, skip_head, return_length, return_length2
|
||||||
|
)
|
||||||
|
infered_audio = infered_audio.squeeze(1).float()
|
||||||
|
upp_res = int(np.floor(factor * self.tgt_sr // 100))
|
||||||
|
if upp_res != self.tgt_sr // 100:
|
||||||
|
if upp_res not in self.resample_kernel:
|
||||||
|
self.resample_kernel[upp_res] = Resample(
|
||||||
|
orig_freq=upp_res,
|
||||||
|
new_freq=self.tgt_sr // 100,
|
||||||
|
dtype=torch.float32,
|
||||||
|
).to(self.device)
|
||||||
|
infered_audio = self.resample_kernel[upp_res](
|
||||||
|
infered_audio[:, : return_length * upp_res]
|
||||||
)
|
)
|
||||||
t5 = ttime()
|
t5 = ttime()
|
||||||
printt(
|
printt(
|
||||||
@ -434,4 +458,4 @@ class RVC:
|
|||||||
t4 - t3,
|
t4 - t3,
|
||||||
t5 - t4,
|
t5 - t4,
|
||||||
)
|
)
|
||||||
return infered_audio.squeeze().float()
|
return infered_audio.squeeze()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user