Linter and ui splitting

This commit is contained in:
VSlobolinskyi 2025-03-20 01:25:30 +02:00
parent ae0863285b
commit 7a1d1027ea
26 changed files with 2314 additions and 1419 deletions

198
.gitignore vendored
View File

@ -1,28 +1,194 @@
.DS_Store
__pycache__
/TEMP
*.pyd
.venv
/opt
tools/aria2c/
tools/flag.txt
# ========= Common (applies to the entire repository) =========
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# Imported from huggingface.co/lj1995/VoiceConversionWebUI
# Virtual environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# IDE and editor settings
# PyCharm
.idea/
# VSCode
.vscode/
# Jupyter Notebook
.ipynb_checkpoints
# Others
*.log
*.spec
*.manifest
# ========= Exclusions specific to the RVC Inference Module =========
# Directories generated by RVC or for runtime usage:
/TEMP
/opt
/tools/aria2c/
tools/flag.txt
/pretrained
/pretrained_v2
/uvr5_weights
hubert_base.pt
rmvpe.onnx
rmvpe.pt
# Generated by RVC
rmvpe.onnx
/logs
/weights
# To set a Python version for the project
.tool-versions
/runtime
/assets/weights/*
ffmpeg.*
ffprobe.*
# ========= Exclusions for the Spark Repository =========
# (Since Spark files will be moved under ./spark, prefix these rules with "spark/")
# Byte-compiled / optimized / DLL files in spark
spark/__pycache__/
spark/*.py[cod]
spark/*$py.class
# Directories and files generated in Spark
spark/pretrained_models/
spark/results/
spark/demo/
spark/.gradio/
# Distribution/packaging for Spark
spark/.Python
spark/build/
spark/develop-eggs/
spark/dist/
spark/downloads/
spark/eggs/
spark/.eggs/
spark/lib/
spark/lib64/
spark/parts/
spark/sdist/
spark/var/
spark/wheels/
spark/share/python-wheels/
spark/*.egg-info/
spark/.installed.cfg
spark/*.egg
spark/MANIFEST
spark/webui_test.py
# PyInstaller (for Spark)
spark/*.manifest
spark/*.spec
# Installer logs for Spark
spark/pip-log.txt
spark/pip-delete-this-directory.txt
# Unit test / coverage reports for Spark
spark/htmlcov/
spark/.tox/
spark/.nox/
spark/.coverage
spark/.coverage.*
spark/.cache
spark/nosetests.xml
spark/coverage.xml
spark/*.cover
spark/*.py,cover
spark/.hypothesis/
spark/.pytest_cache/
spark/cover/
# Translations (Spark)
spark/*.mo
spark/*.pot
# Django/Flask/other web framework logs for Spark (if any)
spark/*.log
spark/local_settings.py
spark/db.sqlite3
spark/db.sqlite3-journal
# Flask and Scrapy caches for Spark
spark/instance/
spark/.webassets-cache
spark/.scrapy
# Sphinx documentation build for Spark
spark/docs/_build/
# PyBuilder / PEP582 for Spark
spark/.pybuilder/
spark/target/
spark/__pypackages__/
# Celery / SageMath for Spark
spark/celerybeat-schedule
spark/celerybeat.pid
spark/*.sage.py
# IDE settings for Spark (if desired)
spark/.idea/
# MkDocs for Spark
spark/site/
# Type checker caches for Spark
spark/.mypy_cache/
spark.dmypy.json
spark/dmypy.json
spark/.pyre/
spark/.pytype/
# Cython debug symbols for Spark
spark/cython_debug/
# PyPI configuration for Spark
spark/.pypirc

View File

@ -2,6 +2,7 @@
import sys
import re
def process_lines(lines, target_gpu):
"""
Process lines from the pyproject.toml file.
@ -12,9 +13,9 @@ def process_lines(lines, target_gpu):
current_block = None # None, "nvidia", or "amd"
# Regex patterns for block markers and separator lines.
nvidia_marker = re.compile(r'---\s*NVIDIA GPU configuration\s*---', re.IGNORECASE)
amd_marker = re.compile(r'---\s*AMD GPU configuration\s*---', re.IGNORECASE)
separator = re.compile(r'^#\s*-{5,}') # a commented separator line
nvidia_marker = re.compile(r"---\s*NVIDIA GPU configuration\s*---", re.IGNORECASE)
amd_marker = re.compile(r"---\s*AMD GPU configuration\s*---", re.IGNORECASE)
separator = re.compile(r"^#\s*-{5,}") # a commented separator line
for line in lines:
# Check if this line marks the beginning of a GPU config block.
@ -48,7 +49,7 @@ def process_lines(lines, target_gpu):
output_lines.append(line)
else:
# Remove the first occurrence of '#' with following space.
uncommented = re.sub(r'^(\s*)#\s?', r'\1', line)
uncommented = re.sub(r"^(\s*)#\s?", r"\1", line)
output_lines.append(uncommented)
else:
# For the non-target block, ensure the line is commented.
@ -57,13 +58,14 @@ def process_lines(lines, target_gpu):
output_lines.append(line)
else:
# Add a '#' preserving the original indentation.
leading_space = re.match(r'^(\s*)', line).group(1)
leading_space = re.match(r"^(\s*)", line).group(1)
output_lines.append(f"{leading_space}# {line.lstrip()}")
else:
# Outside of any GPU config block, just add the line.
output_lines.append(line)
return output_lines
def main():
if len(sys.argv) != 2:
print("Usage: python configure_gpu_deps.py <pyproject.toml> <gpu_type>")
@ -88,5 +90,6 @@ def main():
print(f"Updated {toml_path} for {gpu_type.upper()} GPU configuration.")
if __name__ == "__main__":
main()

36
generate-structure.js Normal file
View File

@ -0,0 +1,36 @@
// generate-structure.js
const fs = require("fs");
const path = require("path");
const targetDir = path.join(__dirname, "./");
const outputFile = path.join(__dirname, "struct.txt");
function generateStructure(dir, indent = "") {
let structure = "";
// Read directory items
const items = fs.readdirSync(dir);
items.forEach((item, index) => {
// Skip .git directories
if (item === ".git") return;
const itemPath = path.join(dir, item);
const stats = fs.statSync(itemPath);
// Use pointer symbols to show tree structure
const pointer = index === items.length - 1 ? "└── " : "├── ";
structure += indent + pointer + item + "\n";
if (stats.isDirectory()) {
// Increase indent: if last, add space, otherwise add vertical line
const extension = index === items.length - 1 ? " " : "│ ";
structure += generateStructure(itemPath, indent + extension);
}
});
return structure;
}
try {
const structureText = generateStructure(targetDir);
fs.writeFileSync(outputFile, structureText, "utf-8");
console.log(`Folder structure saved to ${outputFile}`);
} catch (error) {
console.error("Error generating folder structure:", error);
}

View File

@ -1,761 +1,29 @@
import os
import sys
from dotenv import load_dotenv
now_dir = os.getcwd()
sys.path.append(now_dir)
load_dotenv()
from infer.modules.vc.modules import VC
from configs.config import Config
import torch
#!/usr/bin/env python3
import gradio as gr
import fairseq
from time import sleep
from subprocess import Popen
import warnings
import traceback
import threading
import shutil
import logging
from rvc_ui.initialization import now_dir, config, vc
from rvc_ui.main import build_rvc_ui
# from spark_ui.main import build_spark_ui
def build_unified_ui():
# Build each sub-UI
rvc_ui = build_rvc_ui() # Returns a gr.Blocks instance for RVC WebUI
# spark_ui = build_spark_ui() # Returns a gr.Blocks instance for Spark TTS
logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
tmp = os.path.join(now_dir, "TEMP")
shutil.rmtree(tmp, ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
os.makedirs(tmp, exist_ok=True)
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)
os.environ["TEMP"] = tmp
warnings.filterwarnings("ignore")
torch.manual_seed(114514)
config = Config()
vc = VC(config)
if config.dml == True:
def forward_dml(ctx, x, scale):
ctx.scale = scale
res = x.clone().detach()
return res
fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
ngpu = torch.cuda.device_count()
gpu_infos = []
mem = []
if_gpu_ok = False
if torch.cuda.is_available() or ngpu != 0:
for i in range(ngpu):
gpu_name = torch.cuda.get_device_name(i)
if any(
value in gpu_name.upper()
for value in [
"10",
"16",
"20",
"30",
"40",
"A2",
"A3",
"A4",
"P4",
"A50",
"500",
"A60",
"70",
"80",
"90",
"M4",
"T4",
"TITAN",
"4060",
"L",
"6000",
]
):
# A10#A100#V100#A40#P40#M40#K80#A4500
if_gpu_ok = True # 至少有一张能用的N卡
gpu_infos.append("%s\t%s" % (i, gpu_name))
mem.append(
int(
torch.cuda.get_device_properties(i).total_memory
/ 1024
/ 1024
/ 1024
+ 0.4
)
)
if if_gpu_ok and len(gpu_infos) > 0:
gpu_info = "\n".join(gpu_infos)
default_batch_size = min(mem) // 2
else:
gpu_info = "Unfortunately, there is no compatible GPU available to support your training."
default_batch_size = 1
gpus = "-".join([i[0] for i in gpu_infos])
class ToolButton(gr.Button, gr.components.FormComponent):
"""Small button with single emoji as text, fits inside gradio forms"""
def __init__(self, **kwargs):
super().__init__(variant="tool", **kwargs)
def get_block_name(self):
return "button"
weight_root = os.getenv("weight_root")
weight_uvr5_root = os.getenv("weight_uvr5_root")
index_root = os.getenv("index_root")
outside_index_root = os.getenv("outside_index_root")
names = []
for name in os.listdir(weight_root):
if name.endswith(".pth"):
names.append(name)
index_paths = []
def update_audio_path(uploaded_file):
# If no file was uploaded, return an empty string.
if uploaded_file is None:
return ""
# If multiple files were allowed, take the first one.
if isinstance(uploaded_file, list):
uploaded_file = uploaded_file[0]
# Depending on Gradio version, the file may be a dict or an object with a 'name' attribute.
if isinstance(uploaded_file, dict):
return uploaded_file.get("name", "")
if hasattr(uploaded_file, "name"):
return uploaded_file.name
return str(uploaded_file)
def lookup_indices(index_root):
global index_paths
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append("%s/%s" % (root, name))
lookup_indices(index_root)
lookup_indices(outside_index_root)
uvr5_names = []
for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth") or "onnx" in name:
uvr5_names.append(name.replace(".pth", ""))
def change_choices():
names = []
for name in os.listdir(weight_root):
if name.endswith(".pth"):
names.append(name)
index_paths = []
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append("%s/%s" % (root, name))
return {"choices": sorted(names), "__type__": "update"}, {
"choices": sorted(index_paths),
"__type__": "update",
}
# Only works for NVIDIA GPUs
def clean():
return {"value": "", "__type__": "update"}
def export_onnx(ModelPath, ExportedPath):
from infer.modules.onnx.export import export_onnx as eo
eo(ModelPath, ExportedPath)
sr_dict = {
"32k": 32000,
"40k": 40000,
"48k": 48000,
}
def if_done(done, p):
while 1:
if p.poll() is None:
sleep(0.5)
else:
break
done[0] = True
def if_done_multi(done, ps):
while 1:
# poll==None代表进程未结束
# 只要有一个进程未结束都不停
flag = 1
for p in ps:
if p.poll() is None:
flag = 0
sleep(0.5)
break
if flag == 1:
break
done[0] = True
def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
sr = sr_dict[sr]
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
f.close()
cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % (
config.python_cmd,
trainset_dir,
sr,
n_p,
now_dir,
exp_dir,
config.noparallel,
config.preprocess_per,
)
logger.info("Execute: " + cmd)
# , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
p = Popen(cmd, shell=True)
# 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
done = [False]
threading.Thread(
target=if_done,
args=(
done,
p,
),
).start()
while 1:
with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
yield (f.read())
sleep(1)
if done[0]:
break
with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
log = f.read()
logger.info(log)
yield log
# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe):
gpus = gpus.split("-")
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
f.close()
if if_f0:
if f0method != "rmvpe_gpu":
cmd = (
'"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s'
% (
config.python_cmd,
now_dir,
exp_dir,
n_p,
f0method,
)
)
logger.info("Execute: " + cmd)
p = Popen(
cmd, shell=True, cwd=now_dir
) # , stdin=PIPE, stdout=PIPE,stderr=PIPE
# 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
done = [False]
threading.Thread(
target=if_done,
args=(
done,
p,
),
).start()
else:
if gpus_rmvpe != "-":
gpus_rmvpe = gpus_rmvpe.split("-")
leng = len(gpus_rmvpe)
ps = []
for idx, n_g in enumerate(gpus_rmvpe):
cmd = (
'"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s '
% (
config.python_cmd,
leng,
idx,
n_g,
now_dir,
exp_dir,
config.is_half,
)
)
logger.info("Execute: " + cmd)
p = Popen(
cmd, shell=True, cwd=now_dir
) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
ps.append(p)
# 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
done = [False]
threading.Thread(
target=if_done_multi, #
args=(
done,
ps,
),
).start()
else:
cmd = (
config.python_cmd
+ ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" '
% (
now_dir,
exp_dir,
)
)
logger.info("Execute: " + cmd)
p = Popen(
cmd, shell=True, cwd=now_dir
) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
p.wait()
done = [True]
while 1:
with open(
"%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
) as f:
yield (f.read())
sleep(1)
if done[0]:
break
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
log = f.read()
logger.info(log)
yield log
# 对不同part分别开多进程
"""
n_part=int(sys.argv[1])
i_part=int(sys.argv[2])
i_gpu=sys.argv[3]
exp_dir=sys.argv[4]
os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
"""
leng = len(gpus)
ps = []
for idx, n_g in enumerate(gpus):
cmd = (
'"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s %s'
% (
config.python_cmd,
config.device,
leng,
idx,
n_g,
now_dir,
exp_dir,
version19,
config.is_half,
)
)
logger.info("Execute: " + cmd)
p = Popen(
cmd, shell=True, cwd=now_dir
) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
ps.append(p)
# 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
done = [False]
threading.Thread(
target=if_done_multi,
args=(
done,
ps,
),
).start()
while 1:
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
yield (f.read())
sleep(1)
if done[0]:
break
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
log = f.read()
logger.info(log)
yield log
def get_pretrained_models(path_str, f0_str, sr2):
if_pretrained_generator_exist = os.access(
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
)
if_pretrained_discriminator_exist = os.access(
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
)
if not if_pretrained_generator_exist:
logger.warning(
"assets/pretrained%s/%sG%s.pth not exist, will not use pretrained model",
path_str,
f0_str,
sr2,
)
if not if_pretrained_discriminator_exist:
logger.warning(
"assets/pretrained%s/%sD%s.pth not exist, will not use pretrained model",
path_str,
f0_str,
sr2,
)
return (
(
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_generator_exist
else ""
),
(
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
if if_pretrained_discriminator_exist
else ""
),
)
def change_sr2(sr2, if_f0_3, version19):
path_str = "" if version19 == "v1" else "_v2"
f0_str = "f0" if if_f0_3 else ""
return get_pretrained_models(path_str, f0_str, sr2)
def change_version19(sr2, if_f0_3, version19):
path_str = "" if version19 == "v1" else "_v2"
if sr2 == "32k" and version19 == "v1":
sr2 = "40k"
to_return_sr2 = (
{"choices": ["40k", "48k"], "__type__": "update", "value": sr2}
if version19 == "v1"
else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2}
)
f0_str = "f0" if if_f0_3 else ""
return (
*get_pretrained_models(path_str, f0_str, sr2),
to_return_sr2,
)
# f0method8,pretrained_G14,pretrained_D15
def change_f0(if_f0_3, sr2, version19):
path_str = "" if version19 == "v1" else "_v2"
return (
{"visible": if_f0_3, "__type__": "update"},
{"visible": if_f0_3, "__type__": "update"},
*get_pretrained_models(path_str, "f0" if if_f0_3 == True else "", sr2),
)
# ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
def change_info_(ckpt_path):
if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")):
return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
try:
with open(
ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
) as f:
info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
sr, f0 = info["sample_rate"], info["if_f0"]
version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
return sr, str(f0), version
except:
traceback.print_exc()
return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
F0GPUVisible = config.dml == False
def change_f0_method(f0method8):
if f0method8 == "rmvpe_gpu":
visible = F0GPUVisible
else:
visible = False
return {"visible": visible, "__type__": "update"}
with gr.Blocks(title="RVC WebUI") as app:
gr.Markdown("## RVC WebUI")
gr.Markdown(
value="This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible. <br>If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory <b>Agreement-LICENSE.txt</b> for details."
)
with gr.Blocks(title="Unified Inference UI") as app:
gr.Markdown("## Unified Inference UI: RVC WebUI and Spark TTS")
with gr.Tabs():
with gr.TabItem("Model Inference"):
with gr.Row():
sid0 = gr.Dropdown(label="Inferencing voice:", choices=sorted(names))
with gr.Column():
refresh_button = gr.Button(
"Refresh voice list and index path", variant="primary"
)
clean_button = gr.Button("Unload voice to save GPU memory:", variant="primary")
spk_item = gr.Slider(
minimum=0,
maximum=2333,
step=1,
label="Select Speaker/Singer ID:",
value=0,
visible=False,
interactive=True,
)
clean_button.click(
fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean"
)
with gr.TabItem("Single Inference"):
with gr.Group():
with gr.Row():
with gr.Column():
vc_transform0 = gr.Number(
label="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):",
value=0,
)
# Add a file uploader for drag & drop.
audio_upload = gr.File(
label="拖拽或选择音频文件",
file_types=[".wav"],
file_count="single",
interactive=True,
)
# Existing textbox for the audio file path.
input_audio0 = gr.Textbox(
label="Enter the path of the audio file to be processed (default is the correct format example):",
placeholder="C:\\Users\\Desktop\\model_example.wav",
interactive=True,
)
# When a file is uploaded, update the textbox.
audio_upload.change(
fn=update_audio_path, inputs=audio_upload, outputs=input_audio0
)
file_index1 = gr.Textbox(
label="Path to the feature index file. Leave blank to use the selected result from the dropdown:",
placeholder="C:\\Users\\Desktop\\model_example.index",
interactive=True,
)
file_index2 = gr.Dropdown(
label="Auto-detect index path and select from the dropdown:",
choices=sorted(index_paths),
interactive=True,
)
f0method0 = gr.Radio(
label="Select the pitch extraction algorithm ('pm': faster extraction but lower-quality speech; 'harvest': better bass but extremely slow; 'crepe': better quality but GPU intensive), 'rmvpe': best quality, and little GPU requirement",
choices=(["pm", "harvest", "crepe", "rmvpe"] if config.dml == False else ["pm", "harvest", "rmvpe"]),
value="rmvpe",
interactive=True,
)
with gr.TabItem("RVC WebUI"):
# Render the RVC UI components
rvc_ui.render()
# with gr.TabItem("Spark TTS"):
# # Render the Spark UI components
# spark_ui.render()
return app
with gr.Column():
resample_sr0 = gr.Slider(
minimum=0,
maximum=48000,
label="Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling:",
value=0,
step=1,
interactive=True,
)
rms_mix_rate0 = gr.Slider(
minimum=0,
maximum=1,
label="Adjust the volume envelope scaling. Closer to 0, the more it mimicks the volume of the original vocals. Can help mask noise and make volume sound more natural when set relatively low. Closer to 1 will be more of a consistently loud volume:",
value=0.25,
interactive=True,
)
protect0 = gr.Slider(
minimum=0,
maximum=0.5,
label="Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy:",
value=0.33,
step=0.01,
interactive=True,
)
filter_radius0 = gr.Slider(
minimum=0,
maximum=7,
label="If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.",
value=3,
step=1,
interactive=True,
)
index_rate1 = gr.Slider(
minimum=0,
maximum=1,
label="Search feature ratio (controls accent strength, too high has artifacting):",
value=0.75,
interactive=True,
)
f0_file = gr.File(
label="F0 curve file (optional). One pitch per line. Replaces the default F0 and pitch modulation:",
visible=False,
)
refresh_button.click(
fn=change_choices,
inputs=[],
outputs=[sid0, file_index2],
api_name="infer_refresh",
)
with gr.Group():
with gr.Column():
but0 = gr.Button("Convert", variant="primary")
with gr.Row():
vc_output1 = gr.Textbox(label="Output information")
vc_output2 = gr.Audio(label="Export audio (click on the three dots in the lower right corner to download)")
but0.click(
vc.vc_single,
[
spk_item,
input_audio0,
vc_transform0,
f0_file,
f0method0,
file_index1,
file_index2,
index_rate1,
filter_radius0,
resample_sr0,
rms_mix_rate0,
protect0,
],
[vc_output1, vc_output2],
api_name="infer_convert",
)
with gr.TabItem("Batch Inference"):
gr.Markdown(
value="Batch conversion. Enter the folder containing the audio files to be converted or upload multiple audio files. The converted audio will be output in the specified folder (default: 'opt')."
)
with gr.Row():
with gr.Column():
vc_transform1 = gr.Number(
label="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):",
value=0,
)
opt_input = gr.Textbox(
label="Specify output folder:", value="opt"
)
file_index3 = gr.Textbox(
label="Path to the feature index file. Leave blank to use the selected result from the dropdown:",
value="",
interactive=True,
)
file_index4 = gr.Dropdown(
label="Auto-detect index path and select from the dropdown:",
choices=sorted(index_paths),
interactive=True,
)
f0method1 = gr.Radio(
label="Select the pitch extraction algorithm ('pm': faster extraction but lower-quality speech; 'harvest': better bass but extremely slow; 'crepe': better quality but GPU intensive), 'rmvpe': best quality, and little GPU requirement",
choices=(
["pm", "harvest", "crepe", "rmvpe"]
if config.dml == False
else ["pm", "harvest", "rmvpe"]
),
value="rmvpe",
interactive=True,
)
format1 = gr.Radio(
label="Export file format",
choices=["wav", "flac", "mp3", "m4a"],
value="wav",
interactive=True,
)
refresh_button.click(
fn=lambda: change_choices()[1],
inputs=[],
outputs=file_index4,
api_name="infer_refresh_batch",
)
with gr.Column():
resample_sr1 = gr.Slider(
minimum=0,
maximum=48000,
label="Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling:",
value=0,
step=1,
interactive=True,
)
rms_mix_rate1 = gr.Slider(
minimum=0,
maximum=1,
label="Adjust the volume envelope scaling. Closer to 0, the more it mimicks the volume of the original vocals. Can help mask noise and make volume sound more natural when set relatively low. Closer to 1 will be more of a consistently loud volume:",
value=1,
interactive=True,
)
protect1 = gr.Slider(
minimum=0,
maximum=0.5,
label="Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy:",
value=0.33,
step=0.01,
interactive=True,
)
filter_radius1 = gr.Slider(
minimum=0,
maximum=7,
label="If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.",
value=3,
step=1,
interactive=True,
)
index_rate2 = gr.Slider(
minimum=0,
maximum=1,
label="Search feature ratio (controls accent strength, too high has artifacting):",
value=1,
interactive=True,
)
with gr.Row():
dir_input = gr.Textbox(
label="Enter the path of the audio folder to be processed (copy it from the address bar of the file manager):",
placeholder="C:\\Users\\Desktop\\input_vocal_dir",
)
inputs = gr.File(
file_count="multiple",
label="Multiple audio files can also be imported. If a folder path exists, this input is ignored.",
)
with gr.Row():
but1 = gr.Button("Convert", variant="primary")
vc_output3 = gr.Textbox(label="Output information")
but1.click(
vc.vc_multi,
[
spk_item,
dir_input,
opt_input,
inputs,
vc_transform1,
f0method1,
file_index3,
file_index4,
index_rate2,
filter_radius1,
resample_sr1,
rms_mix_rate1,
protect1,
format1,
],
[vc_output3],
api_name="infer_convert_batch",
)
sid0.change(
fn=vc.get_vc,
inputs=[sid0, protect0, protect1],
outputs=[spk_item, protect0, protect1, file_index2, file_index4],
api_name="infer_change_voice",
)
if __name__ == "__main__":
app = build_unified_ui()
# Needed for RVC
if config.iscolab:
app.queue(concurrency_count=511, max_size=1022).launch(share=True)
else:
@ -765,3 +33,5 @@ with gr.Blocks(title="RVC WebUI") as app:
server_port=config.listen_port,
quiet=True,
)

View File

@ -3,6 +3,7 @@ import onnxsim
import onnx
from infer.lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
def export_onnx(ModelPath, ExportedPath):
cpt = torch.load(ModelPath, map_location="cpu")
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]

View File

@ -146,7 +146,9 @@ class AudioPre:
)
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
os.system('ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path))
os.system(
'ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path)
)
if os.path.exists(opt_format_path):
try:
os.remove(path)
@ -187,7 +189,9 @@ class AudioPre:
)
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
os.system('ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path))
os.system(
'ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path)
)
if os.path.exists(opt_format_path):
try:
os.remove(path)
@ -323,7 +327,9 @@ class AudioPreDeEcho:
)
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
os.system('ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path))
os.system(
'ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path)
)
if os.path.exists(opt_format_path):
try:
os.remove(path)
@ -360,7 +366,9 @@ class AudioPreDeEcho:
)
if os.path.exists(path):
opt_format_path = path[:-4] + ".%s" % format
os.system('ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path))
os.system(
'ffmpeg -i "%s" -vn "%s" -q:a 2 -y' % (path, opt_format_path)
)
if os.path.exists(opt_format_path):
try:
os.remove(path)

View File

@ -18,6 +18,7 @@ from infer.lib.infer_pack.models import (
from infer.modules.vc.pipeline import Pipeline
from infer.modules.vc.utils import *
class VC:
def __init__(self, config):
self.n_spk = None

View File

@ -40,7 +40,9 @@ def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
return f0
def change_rms(data1, sr1, data2, sr2, rate): # 1Yes输入音频2Yes输出音频,rateYes2的占比
def change_rms(
data1, sr1, data2, sr2, rate
): # 1Yes输入音频2Yes输出音频,rateYes2的占比
# print(data1.max(),data2.max())
rms1 = librosa.feature.rms(
y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2

View File

View File

@ -0,0 +1,111 @@
# rvc_ui/initialization.py
import os
import sys
from dotenv import load_dotenv
import torch
import fairseq
import warnings
import shutil
import logging
# Set current directory and load environment variables
now_dir = os.getcwd()
sys.path.append(now_dir)
load_dotenv()
# Configure logging and warnings
logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
# Cleanup and create necessary directories
tmp = os.path.join(now_dir, "TEMP")
shutil.rmtree(tmp, ignore_errors=True)
shutil.rmtree(f"{now_dir}/runtime/Lib/site-packages/infer_pack", ignore_errors=True)
shutil.rmtree(f"{now_dir}/runtime/Lib/site-packages/uvr5_pack", ignore_errors=True)
os.makedirs(tmp, exist_ok=True)
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)
os.environ["TEMP"] = tmp
warnings.filterwarnings("ignore")
torch.manual_seed(114514)
# Import your configuration and voice conversion modules
from configs.config import Config
from infer.modules.vc.modules import VC
# Instantiate configuration and VC
config = Config()
vc = VC(config)
# Optionally override fairseq grad multiply if dml is enabled
if config.dml:
def forward_dml(ctx, x, scale):
ctx.scale = scale
return x.clone().detach()
fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
# GPU detection and info collection
ngpu = torch.cuda.device_count()
gpu_infos = []
mem = []
if_gpu_ok = False
if torch.cuda.is_available() or ngpu != 0:
for i in range(ngpu):
gpu_name = torch.cuda.get_device_name(i)
if any(
value in gpu_name.upper()
for value in [
"10",
"16",
"20",
"30",
"40",
"A2",
"A3",
"A4",
"P4",
"A50",
"500",
"A60",
"70",
"80",
"90",
"M4",
"T4",
"TITAN",
"4060",
"L",
"6000",
]
):
if_gpu_ok = True
gpu_infos.append(f"{i}\t{gpu_name}")
mem.append(
int(torch.cuda.get_device_properties(i).total_memory / (1024**3) + 0.4)
)
if if_gpu_ok and gpu_infos:
gpu_info = "\n".join(gpu_infos)
default_batch_size = min(mem) // 2
else:
gpu_info = (
"Unfortunately, there is no compatible GPU available to support your training."
)
default_batch_size = 1
gpus = "-".join([i[0] for i in gpu_infos])
# Expose useful variables for other modules
__all__ = [
"now_dir",
"config",
"vc",
"gpu_info",
"default_batch_size",
"gpus",
"logger",
]

333
modules/rvc_ui/main.py Normal file
View File

@ -0,0 +1,333 @@
# rvc_ui/main.py
import os
import gradio as gr
import shutil
from time import sleep
# Import modules from your package
from rvc_ui.initialization import now_dir, config, vc
from rvc_ui.utils import (
update_audio_path,
lookup_indices,
change_choices,
clean,
ToolButton,
)
# Setup weight and index paths from environment variables
weight_root = os.getenv("weight_root")
weight_uvr5_root = os.getenv("weight_uvr5_root")
index_root = os.getenv("index_root")
outside_index_root = os.getenv("outside_index_root")
# Prepare model names and index paths
names = [name for name in os.listdir(weight_root) if name.endswith(".pth")]
index_paths = []
lookup_indices(index_root, index_paths)
lookup_indices(outside_index_root, index_paths)
uvr5_names = [
name.replace(".pth", "")
for name in os.listdir(weight_uvr5_root)
if name.endswith(".pth") or "onnx" in name
]
# Define additional dictionaries and UI functions if needed
sr_dict = {"32k": 32000, "40k": 40000, "48k": 48000}
F0GPUVisible = config.dml == False
# Build Gradio UI
def build_rvc_ui():
with gr.Blocks(title="RVC WebUI") as rvc_ui:
gr.Markdown("## RVC WebUI")
gr.Markdown(
value="This software is open source under the MIT license. The author does not have any control over the software. Users who use the software and distribute the sounds exported by the software are solely responsible. <br>If you do not agree with this clause, you cannot use or reference any codes and files within the software package. See the root directory <b>Agreement-LICENSE.txt</b> for details."
)
with gr.Tabs():
with gr.TabItem("Model Inference"):
with gr.Row():
sid0 = gr.Dropdown(
label="Inferencing voice:", choices=sorted(names)
)
with gr.Column():
refresh_button = gr.Button(
"Refresh voice list and index path", variant="primary"
)
clean_button = gr.Button(
"Unload voice to save GPU memory:", variant="primary"
)
spk_item = gr.Slider(
minimum=0,
maximum=2333,
step=1,
label="Select Speaker/Singer ID:",
value=0,
visible=False,
interactive=True,
)
clean_button.click(
fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean"
)
with gr.TabItem("Single Inference"):
with gr.Group():
with gr.Row():
with gr.Column():
vc_transform0 = gr.Number(
label="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):",
value=0,
)
# Add a file uploader for drag & drop.
audio_upload = gr.File(
label="拖拽或选择音频文件",
file_types=[".wav"],
file_count="single",
interactive=True,
)
# Existing textbox for the audio file path.
input_audio0 = gr.Textbox(
label="Enter the path of the audio file to be processed (default is the correct format example):",
placeholder="C:\\Users\\Desktop\\model_example.wav",
interactive=True,
)
# When a file is uploaded, update the textbox.
audio_upload.change(
fn=update_audio_path,
inputs=audio_upload,
outputs=input_audio0,
)
file_index1 = gr.Textbox(
label="Path to the feature index file. Leave blank to use the selected result from the dropdown:",
placeholder="C:\\Users\\Desktop\\model_example.index",
interactive=True,
)
file_index2 = gr.Dropdown(
label="Auto-detect index path and select from the dropdown:",
choices=sorted(index_paths),
interactive=True,
)
f0method0 = gr.Radio(
label="Select the pitch extraction algorithm ('pm': faster extraction but lower-quality speech; 'harvest': better bass but extremely slow; 'crepe': better quality but GPU intensive), 'rmvpe': best quality, and little GPU requirement",
choices=(
["pm", "harvest", "crepe", "rmvpe"]
if config.dml == False
else ["pm", "harvest", "rmvpe"]
),
value="rmvpe",
interactive=True,
)
with gr.Column():
resample_sr0 = gr.Slider(
minimum=0,
maximum=48000,
label="Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling:",
value=0,
step=1,
interactive=True,
)
rms_mix_rate0 = gr.Slider(
minimum=0,
maximum=1,
label="Adjust the volume envelope scaling. Closer to 0, the more it mimicks the volume of the original vocals. Can help mask noise and make volume sound more natural when set relatively low. Closer to 1 will be more of a consistently loud volume:",
value=0.25,
interactive=True,
)
protect0 = gr.Slider(
minimum=0,
maximum=0.5,
label="Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy:",
value=0.33,
step=0.01,
interactive=True,
)
filter_radius0 = gr.Slider(
minimum=0,
maximum=7,
label="If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.",
value=3,
step=1,
interactive=True,
)
index_rate1 = gr.Slider(
minimum=0,
maximum=1,
label="Search feature ratio (controls accent strength, too high has artifacting):",
value=0.75,
interactive=True,
)
f0_file = gr.File(
label="F0 curve file (optional). One pitch per line. Replaces the default F0 and pitch modulation:",
visible=False,
)
refresh_button.click(
fn=change_choices,
inputs=[],
outputs=[sid0, file_index2],
api_name="infer_refresh",
)
with gr.Group():
with gr.Column():
but0 = gr.Button("Convert", variant="primary")
with gr.Row():
vc_output1 = gr.Textbox(label="Output information")
vc_output2 = gr.Audio(
label="Export audio (click on the three dots in the lower right corner to download)"
)
but0.click(
vc.vc_single,
[
spk_item,
input_audio0,
vc_transform0,
f0_file,
f0method0,
file_index1,
file_index2,
index_rate1,
filter_radius0,
resample_sr0,
rms_mix_rate0,
protect0,
],
[vc_output1, vc_output2],
api_name="infer_convert",
)
with gr.TabItem("Batch Inference"):
gr.Markdown(
value="Batch conversion. Enter the folder containing the audio files to be converted or upload multiple audio files. The converted audio will be output in the specified folder (default: 'opt')."
)
with gr.Row():
with gr.Column():
vc_transform1 = gr.Number(
label="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):",
value=0,
)
opt_input = gr.Textbox(
label="Specify output folder:", value="opt"
)
file_index3 = gr.Textbox(
label="Path to the feature index file. Leave blank to use the selected result from the dropdown:",
value="",
interactive=True,
)
file_index4 = gr.Dropdown(
label="Auto-detect index path and select from the dropdown:",
choices=sorted(index_paths),
interactive=True,
)
f0method1 = gr.Radio(
label="Select the pitch extraction algorithm ('pm': faster extraction but lower-quality speech; 'harvest': better bass but extremely slow; 'crepe': better quality but GPU intensive), 'rmvpe': best quality, and little GPU requirement",
choices=(
["pm", "harvest", "crepe", "rmvpe"]
if config.dml == False
else ["pm", "harvest", "rmvpe"]
),
value="rmvpe",
interactive=True,
)
format1 = gr.Radio(
label="Export file format",
choices=["wav", "flac", "mp3", "m4a"],
value="wav",
interactive=True,
)
refresh_button.click(
fn=lambda: change_choices()[1],
inputs=[],
outputs=file_index4,
api_name="infer_refresh_batch",
)
with gr.Column():
resample_sr1 = gr.Slider(
minimum=0,
maximum=48000,
label="Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling:",
value=0,
step=1,
interactive=True,
)
rms_mix_rate1 = gr.Slider(
minimum=0,
maximum=1,
label="Adjust the volume envelope scaling. Closer to 0, the more it mimicks the volume of the original vocals. Can help mask noise and make volume sound more natural when set relatively low. Closer to 1 will be more of a consistently loud volume:",
value=1,
interactive=True,
)
protect1 = gr.Slider(
minimum=0,
maximum=0.5,
label="Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy:",
value=0.33,
step=0.01,
interactive=True,
)
filter_radius1 = gr.Slider(
minimum=0,
maximum=7,
label="If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.",
value=3,
step=1,
interactive=True,
)
index_rate2 = gr.Slider(
minimum=0,
maximum=1,
label="Search feature ratio (controls accent strength, too high has artifacting):",
value=1,
interactive=True,
)
with gr.Row():
dir_input = gr.Textbox(
label="Enter the path of the audio folder to be processed (copy it from the address bar of the file manager):",
placeholder="C:\\Users\\Desktop\\input_vocal_dir",
)
inputs = gr.File(
file_count="multiple",
label="Multiple audio files can also be imported. If a folder path exists, this input is ignored.",
)
with gr.Row():
but1 = gr.Button("Convert", variant="primary")
vc_output3 = gr.Textbox(label="Output information")
but1.click(
vc.vc_multi,
[
spk_item,
dir_input,
opt_input,
inputs,
vc_transform1,
f0method1,
file_index3,
file_index4,
index_rate2,
filter_radius1,
resample_sr1,
rms_mix_rate1,
protect1,
format1,
],
[vc_output3],
api_name="infer_convert_batch",
)
sid0.change(
fn=vc.get_vc,
inputs=[sid0, protect0, protect1],
outputs=[
spk_item,
protect0,
protect1,
file_index2,
file_index4,
],
api_name="infer_change_voice",
)
return rvc_ui
if __name__ == "__main__":
build_rvc_ui()

61
modules/rvc_ui/utils.py Normal file
View File

@ -0,0 +1,61 @@
# rvc_ui/utils.py
import os
import gradio as gr
# Function to update the audio path when a file is uploaded
def update_audio_path(uploaded_file):
if uploaded_file is None:
return ""
if isinstance(uploaded_file, list):
uploaded_file = uploaded_file[0]
if isinstance(uploaded_file, dict):
return uploaded_file.get("name", "")
if hasattr(uploaded_file, "name"):
return uploaded_file.name
return str(uploaded_file)
# Function to lookup index files in a given directory
def lookup_indices(index_root, index_paths):
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append(f"{root}/{name}")
# Function to refresh available model and index choices
def change_choices(weight_root, index_root):
names = [name for name in os.listdir(weight_root) if name.endswith(".pth")]
index_paths = []
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append(f"{root}/{name}")
return {"choices": sorted(names), "__type__": "update"}, {
"choices": sorted(index_paths),
"__type__": "update",
}
# Custom Gradio ToolButton component
class ToolButton(gr.Button, gr.components.FormComponent):
def __init__(self, **kwargs):
super().__init__(variant="tool", **kwargs)
def get_block_name(self):
return "button"
# Simple clean function to reset a field (used for GPU memory management)
def clean():
return {"value": "", "__type__": "update"}
__all__ = [
"update_audio_path",
"lookup_indices",
"change_choices",
"ToolButton",
"clean",
]

66
modules/rvc_ui/utils.pyi Normal file
View File

@ -0,0 +1,66 @@
# rvc_ui/utils.py
import os
import gradio as gr
# Function to update the audio path when a file is uploaded
def update_audio_path(uploaded_file):
if uploaded_file is None:
return ""
if isinstance(uploaded_file, list):
uploaded_file = uploaded_file[0]
if isinstance(uploaded_file, dict):
return uploaded_file.get("name", "")
if hasattr(uploaded_file, "name"):
return uploaded_file.name
return str(uploaded_file)
# Function to lookup index files in a given directory
def lookup_indices(index_root, index_paths):
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append(f"{root}/{name}")
# Function to refresh available model and index choices
def change_choices(weight_root, index_root):
names = [name for name in os.listdir(weight_root) if name.endswith(".pth")]
index_paths = []
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append(f"{root}/{name}")
return {"choices": sorted(names), "__type__": "update"}, {
"choices": sorted(index_paths),
"__type__": "update",
}
from gradio.events import Dependency
# Custom Gradio ToolButton component
class ToolButton(gr.Button, gr.components.FormComponent):
def __init__(self, **kwargs):
super().__init__(variant="tool", **kwargs)
def get_block_name(self):
return "button"
from typing import Callable, Literal, Sequence, Any, TYPE_CHECKING
from gradio.blocks import Block
if TYPE_CHECKING:
from gradio.components import Timer
# Simple clean function to reset a field (used for GPU memory management)
def clean():
return {"value": "", "__type__": "update"}
__all__ = [
"update_audio_path",
"lookup_indices",
"change_choices",
"ToolButton",
"clean",
]

1386
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,17 @@
[tool.poetry]
name = "spark-rvc-inference-module"
version = "0.1.0"
description = "Spark TTS with RVC inference module"
description = "Spark TTS with RVC inference module. A unification of the RVC inference module and Spark TTS projects."
authors = ["vitalii.slobolinskyi"]
package-mode = false
license = "MIT"
readme = "README.md"
homepage = "https://github.com/VSlobolinskyi/spark-rvc-inference-module"
repository = "https://github.com/VSlobolinskyi/spark-rvc-inference-module"
[[tool.poetry.packages]]
include = "rvc_ui"
from = "modules"
[tool.poetry.dependencies]
python = ">=3.11,<3.12"
@ -60,7 +68,13 @@ torch-directml = "^0.2.5.dev240914"
autoflake = "^2.3.1"
[tool.poetry.group.dev.dependencies]
# Add any development dependencies here
black = "^25.1.0"
pytest = "^7.0"
[tool.black]
line-length = 88
target-version = ['py38']
include = '\.pyi?$'
[build-system]
requires = ["poetry-core>=1.0.0"]

403
struct.txt Normal file
View File

@ -0,0 +1,403 @@
├── .env
├── .gitignore
├── .vscode
│ └── settings.json
├── assets
│ ├── hubert
│ │ ├── .gitignore
│ │ ├── hubert_base.pt
│ │ └── hubert_inputs.pth
│ ├── indices
│ │ └── .gitignore
│ ├── pretrained
│ │ ├── .gitignore
│ │ ├── D32k.pth
│ │ ├── D40k.pth
│ │ ├── D48k.pth
│ │ ├── f0D32k.pth
│ │ ├── f0D40k.pth
│ │ ├── f0D48k.pth
│ │ ├── f0G32k.pth
│ │ ├── f0G40k.pth
│ │ ├── f0G48k.pth
│ │ ├── G32k.pth
│ │ ├── G40k.pth
│ │ └── G48k.pth
│ ├── pretrained_v2
│ │ ├── .gitignore
│ │ ├── D32k.pth
│ │ ├── D40k.pth
│ │ ├── D48k.pth
│ │ ├── f0D32k.pth
│ │ ├── f0D40k.pth
│ │ ├── f0D48k.pth
│ │ ├── f0G32k.pth
│ │ ├── f0G40k.pth
│ │ ├── f0G48k.pth
│ │ ├── G32k.pth
│ │ ├── G40k.pth
│ │ └── G48k.pth
│ ├── rmvpe
│ │ ├── .gitignore
│ │ ├── rmvpe.onnx
│ │ ├── rmvpe.pt
│ │ └── rmvpe_inputs.pth
│ ├── Synthesizer_inputs.pth
│ ├── uvr5_weights
│ │ ├── .gitignore
│ │ ├── HP2-%E4%BA%BA%E5%A3%B0vocals%2B%E9%9D%9E%E4%BA%BA%E5%A3%B0instrumentals.pth
│ │ ├── HP2_all_vocals.pth
│ │ ├── HP3_all_vocals.pth
│ │ ├── HP5-%E4%B8%BB%E6%97%8B%E5%BE%8B%E4%BA%BA%E5%A3%B0vocals%2B%E5%85%B6%E4%BB%96instrumentals.pth
│ │ ├── HP5_only_main_vocal.pth
│ │ ├── onnx_dereverb_By_FoxJoy
│ │ │ └── vocals.onnx
│ │ ├── VR-DeEchoAggressive.pth
│ │ ├── VR-DeEchoDeReverb.pth
│ │ └── VR-DeEchoNormal.pth
│ └── weights
│ ├── .gitignore
│ └── SilverWolf_e300_s6600.pth
├── configs
│ ├── config.json
│ ├── config.py
│ ├── inuse
│ │ ├── .gitignore
│ │ ├── v1
│ │ │ ├── .gitignore
│ │ │ ├── 32k.json
│ │ │ ├── 40k.json
│ │ │ └── 48k.json
│ │ └── v2
│ │ ├── .gitignore
│ │ ├── 32k.json
│ │ └── 48k.json
│ ├── v1
│ │ ├── 32k.json
│ │ ├── 40k.json
│ │ └── 48k.json
│ ├── v2
│ │ ├── 32k.json
│ │ └── 48k.json
│ └── __pycache__
│ └── config.cpython-311.pyc
├── configure_gpu_deps.py
├── ffmpeg.exe
├── ffprobe.exe
├── generate-structure.js
├── infer
│ ├── lib
│ │ ├── audio.py
│ │ ├── infer_pack
│ │ │ ├── attentions.py
│ │ │ ├── attentions_onnx.py
│ │ │ ├── commons.py
│ │ │ ├── models.py
│ │ │ ├── models_onnx.py
│ │ │ ├── modules
│ │ │ │ └── F0Predictor
│ │ │ │ ├── DioF0Predictor.py
│ │ │ │ ├── F0Predictor.py
│ │ │ │ ├── HarvestF0Predictor.py
│ │ │ │ ├── PMF0Predictor.py
│ │ │ │ └── __init__.py
│ │ │ ├── modules.py
│ │ │ ├── onnx_inference.py
│ │ │ ├── transforms.py
│ │ │ └── __pycache__
│ │ │ ├── attentions.cpython-311.pyc
│ │ │ ├── commons.cpython-311.pyc
│ │ │ ├── models.cpython-311.pyc
│ │ │ ├── modules.cpython-311.pyc
│ │ │ └── transforms.cpython-311.pyc
│ │ ├── jit
│ │ │ ├── get_hubert.py
│ │ │ ├── get_rmvpe.py
│ │ │ ├── get_synthesizer.py
│ │ │ ├── __init__.py
│ │ │ └── __pycache__
│ │ │ └── __init__.cpython-311.pyc
│ │ ├── rmvpe.py
│ │ ├── rtrvc.py
│ │ ├── slicer2.py
│ │ ├── uvr5_pack
│ │ │ ├── lib_v5
│ │ │ │ ├── dataset.py
│ │ │ │ ├── layers.py
│ │ │ │ ├── layers_123812KB .py
│ │ │ │ ├── layers_123821KB.py
│ │ │ │ ├── layers_33966KB.py
│ │ │ │ ├── layers_537227KB.py
│ │ │ │ ├── layers_537238KB.py
│ │ │ │ ├── layers_new.py
│ │ │ │ ├── modelparams
│ │ │ │ │ ├── 1band_sr16000_hl512.json
│ │ │ │ │ ├── 1band_sr32000_hl512.json
│ │ │ │ │ ├── 1band_sr33075_hl384.json
│ │ │ │ │ ├── 1band_sr44100_hl1024.json
│ │ │ │ │ ├── 1band_sr44100_hl256.json
│ │ │ │ │ ├── 1band_sr44100_hl512.json
│ │ │ │ │ ├── 1band_sr44100_hl512_cut.json
│ │ │ │ │ ├── 2band_32000.json
│ │ │ │ │ ├── 2band_44100_lofi.json
│ │ │ │ │ ├── 2band_48000.json
│ │ │ │ │ ├── 3band_44100.json
│ │ │ │ │ ├── 3band_44100_mid.json
│ │ │ │ │ ├── 3band_44100_msb2.json
│ │ │ │ │ ├── 4band_44100.json
│ │ │ │ │ ├── 4band_44100_mid.json
│ │ │ │ │ ├── 4band_44100_msb.json
│ │ │ │ │ ├── 4band_44100_msb2.json
│ │ │ │ │ ├── 4band_44100_reverse.json
│ │ │ │ │ ├── 4band_44100_sw.json
│ │ │ │ │ ├── 4band_v2.json
│ │ │ │ │ ├── 4band_v2_sn.json
│ │ │ │ │ ├── 4band_v3.json
│ │ │ │ │ └── ensemble.json
│ │ │ │ ├── model_param_init.py
│ │ │ │ ├── nets.py
│ │ │ │ ├── nets_123812KB.py
│ │ │ │ ├── nets_123821KB.py
│ │ │ │ ├── nets_33966KB.py
│ │ │ │ ├── nets_537227KB.py
│ │ │ │ ├── nets_537238KB.py
│ │ │ │ ├── nets_61968KB.py
│ │ │ │ ├── nets_new.py
│ │ │ │ └── spec_utils.py
│ │ │ ├── name_params.json
│ │ │ └── utils.py
│ │ └── __pycache__
│ │ ├── audio.cpython-311.pyc
│ │ └── rmvpe.cpython-311.pyc
│ └── modules
│ ├── ipex
│ │ ├── attention.py
│ │ ├── gradscaler.py
│ │ ├── hijacks.py
│ │ └── __init__.py
│ ├── onnx
│ │ └── export.py
│ ├── train
│ │ ├── extract
│ │ │ ├── extract_f0_print.py
│ │ │ ├── extract_f0_rmvpe.py
│ │ │ └── extract_f0_rmvpe_dml.py
│ │ ├── extract_feature_print.py
│ │ ├── preprocess.py
│ │ └── train.py
│ ├── uvr5
│ │ ├── mdxnet.py
│ │ └── vr.py
│ └── vc
│ ├── modules.py
│ ├── pipeline.py
│ ├── utils.py
│ ├── __init__.py
│ └── __pycache__
│ ├── modules.cpython-311.pyc
│ ├── pipeline.cpython-311.pyc
│ ├── utils.cpython-311.pyc
│ └── __init__.cpython-311.pyc
├── infer-web.py
├── LICENSE
├── logs
│ ├── added_IVF611_Flat_nprobe_1_SilverWolf_v2.index
│ └── mute
│ ├── 0_gt_wavs
│ │ ├── mute32k.wav
│ │ ├── mute40k.wav
│ │ └── mute48k.wav
│ ├── 1_16k_wavs
│ │ └── mute.wav
│ ├── 2a_f0
│ │ └── mute.wav.npy
│ ├── 2b-f0nsf
│ │ └── mute.wav.npy
│ ├── 3_feature256
│ │ └── mute.npy
│ └── 3_feature768
│ └── mute.npy
├── modules
│ ├── rvc_ui
│ │ ├── initialization.py
│ │ ├── main.py
│ │ ├── utils.py
│ │ └── __init__.py
│ └── spark_ui
│ ├── main.py
│ └── __init__.py
├── poetry.lock
├── pyproject.toml
├── README.md
├── spark
│ ├── cli
│ │ ├── inference.py
│ │ ├── spark.sparktts.py
│ │ └── __pycache__
│ │ ├── spark.sparktts.cpython-311.pyc
│ │ └── spark.sparktts.cpython-312.pyc
│ ├── LICENSE
│ ├── pretrained_models
│ │ └── Spark-TTS-0.5B
│ │ ├── .gitattributes
│ │ ├── BiCodec
│ │ │ ├── config.yaml
│ │ │ └── model.safetensors
│ │ ├── config.yaml
│ │ ├── LLM
│ │ │ ├── added_tokens.json
│ │ │ ├── config.json
│ │ │ ├── merges.txt
│ │ │ ├── model.safetensors
│ │ │ ├── special_tokens_map.json
│ │ │ ├── tokenizer.json
│ │ │ ├── tokenizer_config.json
│ │ │ └── vocab.json
│ │ ├── README.md
│ │ ├── src
│ │ │ ├── figures
│ │ │ │ ├── gradio_control.png
│ │ │ │ ├── gradio_TTS.png
│ │ │ │ ├── infer_control.png
│ │ │ │ └── infer_voice_cloning.png
│ │ │ └── logo
│ │ │ ├── HKUST.jpg
│ │ │ ├── mobvoi.jpg
│ │ │ ├── mobvoi.png
│ │ │ ├── NPU.jpg
│ │ │ ├── NTU.jpg
│ │ │ ├── SJU.jpg
│ │ │ ├── SparkAudio.jpg
│ │ │ ├── SparkAudio2.jpg
│ │ │ ├── spark.sparktts.jpg
│ │ │ └── spark.sparktts.png
│ │ └── wav2vec2-large-xlsr-53
│ │ ├── config.json
│ │ ├── preprocessor_config.json
│ │ ├── pytorch_model.bin
│ │ └── README.md
│ ├── runtime
│ │ └── triton_trtllm
│ │ ├── client_grpc.py
│ │ ├── client_http.py
│ │ ├── model_repo
│ │ │ ├── audio_tokenizer
│ │ │ │ ├── 1
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ ├── spark_tts
│ │ │ │ ├── 1
│ │ │ │ │ └── model.py
│ │ │ │ └── config.pbtxt
│ │ │ ├── tensorrt_llm
│ │ │ │ ├── 1
│ │ │ │ │ └── .gitkeep
│ │ │ │ └── config.pbtxt
│ │ │ └── vocoder
│ │ │ ├── 1
│ │ │ │ └── model.py
│ │ │ └── config.pbtxt
│ │ ├── README.md
│ │ └── scripts
│ │ ├── convert_checkpoint.py
│ │ └── fill_template.py
│ └── sparktts
│ ├── models
│ │ ├── audio_tokenizer.py
│ │ ├── bicodec.py
│ │ └── __pycache__
│ │ ├── audio_tokenizer.cpython-311.pyc
│ │ ├── audio_tokenizer.cpython-312.pyc
│ │ ├── bicodec.cpython-311.pyc
│ │ └── bicodec.cpython-312.pyc
│ ├── modules
│ │ ├── blocks
│ │ │ ├── layers.py
│ │ │ ├── samper.py
│ │ │ ├── vocos.py
│ │ │ └── __pycache__
│ │ │ ├── layers.cpython-311.pyc
│ │ │ ├── layers.cpython-312.pyc
│ │ │ ├── samper.cpython-311.pyc
│ │ │ ├── samper.cpython-312.pyc
│ │ │ ├── vocos.cpython-311.pyc
│ │ │ └── vocos.cpython-312.pyc
│ │ ├── encoder_decoder
│ │ │ ├── feat_decoder.py
│ │ │ ├── feat_encoder.py
│ │ │ ├── wave_generator.py
│ │ │ └── __pycache__
│ │ │ ├── feat_decoder.cpython-311.pyc
│ │ │ ├── feat_decoder.cpython-312.pyc
│ │ │ ├── feat_encoder.cpython-311.pyc
│ │ │ ├── feat_encoder.cpython-312.pyc
│ │ │ ├── wave_generator.cpython-311.pyc
│ │ │ └── wave_generator.cpython-312.pyc
│ │ ├── fsq
│ │ │ ├── finite_scalar_quantization.py
│ │ │ ├── residual_fsq.py
│ │ │ └── __pycache__
│ │ │ ├── finite_scalar_quantization.cpython-311.pyc
│ │ │ ├── finite_scalar_quantization.cpython-312.pyc
│ │ │ ├── residual_fsq.cpython-311.pyc
│ │ │ └── residual_fsq.cpython-312.pyc
│ │ ├── speaker
│ │ │ ├── ecapa_tdnn.py
│ │ │ ├── perceiver_encoder.py
│ │ │ ├── pooling_layers.py
│ │ │ ├── speaker_encoder.py
│ │ │ └── __pycache__
│ │ │ ├── ecapa_tdnn.cpython-311.pyc
│ │ │ ├── ecapa_tdnn.cpython-312.pyc
│ │ │ ├── perceiver_encoder.cpython-311.pyc
│ │ │ ├── perceiver_encoder.cpython-312.pyc
│ │ │ ├── pooling_layers.cpython-311.pyc
│ │ │ ├── pooling_layers.cpython-312.pyc
│ │ │ ├── speaker_encoder.cpython-311.pyc
│ │ │ └── speaker_encoder.cpython-312.pyc
│ │ └── vq
│ │ ├── factorized_vector_quantize.py
│ │ └── __pycache__
│ │ ├── factorized_vector_quantize.cpython-311.pyc
│ │ └── factorized_vector_quantize.cpython-312.pyc
│ └── utils
│ ├── audio.py
│ ├── file.py
│ ├── token_parser.py
│ ├── __init__.py
│ └── __pycache__
│ ├── audio.cpython-311.pyc
│ ├── audio.cpython-312.pyc
│ ├── file.cpython-311.pyc
│ ├── file.cpython-312.pyc
│ ├── token_parser.cpython-311.pyc
│ ├── token_parser.cpython-312.pyc
│ ├── __init__.cpython-311.pyc
│ └── __init__.cpython-312.pyc
├── struct.txt
├── TEMP
├── temp_tools
│ ├── analize_deps.py
│ ├── clean_i18n.py
│ ├── join.py
│ ├── remap_i18n.py
│ ├── remove_unused_imports.py
│ └── used_dependencies.txt
└── tools
├── app.py
├── calc_rvc_model_similarity.py
├── download_assets.py
├── download_model.py
├── export_onnx.py
├── infer
│ ├── infer-pm-index256.py
│ ├── train-index-v2.py
│ ├── train-index.py
│ └── trans_weights.py
├── infer_batch_rvc.py
├── infer_cli.py
├── onnx_inference_demo.py
└── torchgate
├── torchgate.py
├── utils.py
└── __init__.py

View File

@ -2,6 +2,7 @@ import os
import ast
from collections import defaultdict
class CodeAnalyzer(ast.NodeVisitor):
def __init__(self):
# Stores plain "import module" names.
@ -45,12 +46,13 @@ class CodeAnalyzer(ast.NodeVisitor):
return node.attr
return None
def process_file(file_path):
"""
Parse the Python file at file_path and return an analysis of its imports and function calls.
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
with open(file_path, "r", encoding="utf-8") as f:
file_content = f.read()
tree = ast.parse(file_content, filename=file_path)
except Exception as e:
@ -60,11 +62,14 @@ def process_file(file_path):
analyzer = CodeAnalyzer()
analyzer.visit(tree)
return {
'imports': sorted(analyzer.imports),
'from_imports': {module: sorted(names) for module, names in analyzer.from_imports.items()},
'function_calls': sorted(analyzer.function_calls)
"imports": sorted(analyzer.imports),
"from_imports": {
module: sorted(names) for module, names in analyzer.from_imports.items()
},
"function_calls": sorted(analyzer.function_calls),
}
def process_directory(root_dir):
"""
Walk recursively through root_dir, process each .py file, and collect analysis data.
@ -79,26 +84,28 @@ def process_directory(root_dir):
summary[file_path] = analysis
return summary
def write_summary(summary, output_file):
"""
Write the collected analysis to a text file in a human-readable format.
"""
with open(output_file, 'w', encoding='utf-8') as f:
with open(output_file, "w", encoding="utf-8") as f:
for file, data in summary.items():
f.write(f"File: {file}\n")
f.write(" Imports:\n")
for imp in data['imports']:
for imp in data["imports"]:
f.write(f" - {imp}\n")
f.write(" From Imports:\n")
for module, names in data['from_imports'].items():
for module, names in data["from_imports"].items():
f.write(f" - {module}: {', '.join(names)}\n")
f.write(" Function Calls:\n")
for call in data['function_calls']:
for call in data["function_calls"]:
f.write(f" - {call}\n")
f.write("\n")
print(f"Analysis written to {output_file}")
if __name__ == '__main__':
if __name__ == "__main__":
project_root = os.getcwd() # Assumes the script is placed at your project root.
analysis_summary = process_directory(project_root)
output_summary_file = "temp_tools/used_dependencies.txt"

View File

@ -1,13 +1,14 @@
import os
import re
def process_file(file_path):
"""
Reads a file, replaces occurrences of "Any text" (even if multiline)
with just the "Any text" part, and writes the file back if changes are made.
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
except UnicodeDecodeError:
print(f"Skipping binary or non-text file: {file_path}")
@ -17,18 +18,20 @@ def process_file(file_path):
# It supports both single and double quoted strings, and uses DOTALL
# so that the string literal may span multiple lines.
pattern = re.compile(
r'i18n\s*\(\s*(?P<quote>["\'])(?P<text>.*?)(?P=quote)\s*\)',
re.DOTALL
r'i18n\s*\(\s*(?P<quote>["\'])(?P<text>.*?)(?P=quote)\s*\)', re.DOTALL
)
# Replacement function returns only the string literal (including quotes).
new_content = pattern.sub(lambda m: m.group("quote") + m.group("text") + m.group("quote"), content)
new_content = pattern.sub(
lambda m: m.group("quote") + m.group("text") + m.group("quote"), content
)
if new_content != content:
with open(file_path, 'w', encoding='utf-8') as f:
with open(file_path, "w", encoding="utf-8") as f:
f.write(new_content)
print(f"Updated: {file_path}")
def process_directory(root_dir):
"""
Recursively traverse all files in root_dir and process them.
@ -38,7 +41,8 @@ def process_directory(root_dir):
file_path = os.path.join(dirpath, filename)
process_file(file_path)
if __name__ == '__main__':
if __name__ == "__main__":
# Start processing from the current working directory.
project_root = os.getcwd()
process_directory(project_root)

87
temp_tools/join.py Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env python3
import subprocess
import os
import shutil
def clone_spark_repo(repo_url, clone_dir):
"""Clone the spark repo into a temporary directory."""
print(f"Cloning {repo_url} into {clone_dir} ...")
subprocess.run(["git", "clone", repo_url, clone_dir], check=True)
print("Clone completed.")
def remove_git_directory(clone_dir):
"""Remove the .git directory from the cloned repo, adjusting permissions if necessary."""
git_dir = os.path.join(clone_dir, ".git")
if os.path.exists(git_dir):
def handle_remove_error(func, path, exc_info):
# Change the file to writable and try again.
os.chmod(path, stat.S_IWRITE)
func(path)
shutil.rmtree(git_dir, onerror=handle_remove_error)
print(f"Removed .git directory from {clone_dir}")
def ensure_folder(folder_path):
"""Ensure that a folder exists."""
if not os.path.exists(folder_path):
os.makedirs(folder_path)
print(f"Created folder: {folder_path}")
def merge_spark_repo(clone_dir, spark_folder, temp_folder, duplicate_files):
"""Merge files from the cloned spark repo.
- Files whose names are in duplicate_files and already exist in the root are moved to temp_folder.
- All other items are moved to the spark_folder.
"""
for item in os.listdir(clone_dir):
source_path = os.path.join(clone_dir, item)
# Determine the destination:
root_dest = os.path.join(os.getcwd(), item)
if item in duplicate_files and os.path.exists(root_dest):
# Move duplicate file to TEMP folder
dest_path = os.path.join(temp_folder, item)
print(f"Duplicate '{item}' exists in root; moving it to {temp_folder}.")
else:
# Otherwise, place in the spark folder
dest_path = os.path.join(spark_folder, item)
print(f"Moving '{item}' to {spark_folder}.")
shutil.move(source_path, dest_path)
def main():
# Settings
repo_url = "https://github.com/VSlobolinskyi/spark-tts-poetry.git"
clone_dir = "spark_repo_temp" # temporary folder for the cloned spark repo
spark_folder = "spark" # destination folder for spark-specific instruments
temp_folder = "TEMP" # destination folder for duplicate files
# List of duplicate filenames that, if found in the root, will be moved to TEMP
duplicate_files = {"pyproject.toml", ".gitignore", "LICENSE", "README.md"}
# Clone the Spark repo
clone_spark_repo(repo_url, clone_dir)
# Remove the .git directory from the cloned repo
remove_git_directory(clone_dir)
# Ensure destination folders exist
ensure_folder(spark_folder)
ensure_folder(temp_folder)
# Merge files from the cloned spark repo into the correct locations
merge_spark_repo(clone_dir, spark_folder, temp_folder, duplicate_files)
# Remove the temporary clone directory if it still exists (it should be empty now)
if os.path.exists(clone_dir):
shutil.rmtree(clone_dir)
print(f"Removed temporary directory: {clone_dir}")
print("Spark repo merged successfully into the RVC project.")
if __name__ == "__main__":
main()

View File

@ -1,23 +1,25 @@
import os
import json
def load_mapping(mapping_path):
"""
Load the mapping JSON file containing key-value pairs.
"""
with open(mapping_path, 'r', encoding='utf-8') as f:
with open(mapping_path, "r", encoding="utf-8") as f:
mapping = json.load(f)
# Sort mapping items by length of the original text (key) in descending order
sorted_mapping = sorted(mapping.items(), key=lambda kv: len(kv[0]), reverse=True)
return sorted_mapping
def process_file(file_path, sorted_mapping):
"""
Read a file, replace occurrences of each key with its value, and write the file back
if changes were made.
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
except UnicodeDecodeError:
# Skip binary files or files that can't be decoded as UTF-8
@ -31,10 +33,11 @@ def process_file(file_path, sorted_mapping):
new_content = new_content.replace(original_text, translated_text)
if new_content != content:
with open(file_path, 'w', encoding='utf-8') as f:
with open(file_path, "w", encoding="utf-8") as f:
f.write(new_content)
print(f"Updated: {file_path}")
def process_directory(root_dir, sorted_mapping, mapping_file_path):
"""
Walk through all directories starting at root_dir and process each file.
@ -47,10 +50,11 @@ def process_directory(root_dir, sorted_mapping, mapping_file_path):
continue
process_file(file_path, sorted_mapping)
if __name__ == '__main__':
if __name__ == "__main__":
# Assume the script is run from the project root.
project_root = os.getcwd()
mapping_file_path = os.path.join(project_root, 'i18n', 'locale', 'en_US.json')
mapping_file_path = os.path.join(project_root, "i18n", "locale", "en_US.json")
if not os.path.exists(mapping_file_path):
print(f"Mapping file not found: {mapping_file_path}")

View File

@ -1,6 +1,7 @@
import os
import subprocess
def process_file(file_path):
"""
Run autoflake on the file to remove unused imports in-place.
@ -10,12 +11,13 @@ def process_file(file_path):
# --remove-all-unused-imports removes unused imports.
subprocess.run(
["autoflake", "--in-place", "--remove-all-unused-imports", file_path],
check=True
check=True,
)
print(f"Processed: {file_path}")
except subprocess.CalledProcessError as e:
print(f"Error processing {file_path}: {e}")
def process_directory(root_dir):
"""
Walk recursively through root_dir and process all .py files.
@ -26,7 +28,8 @@ def process_directory(root_dir):
file_path = os.path.join(dirpath, filename)
process_file(file_path)
if __name__ == '__main__':
if __name__ == "__main__":
# Start from the current directory (you can change this to your project root)
project_root = os.getcwd()
process_directory(project_root)

View File

@ -59,7 +59,8 @@ with app:
)
vc_input3 = gr.Audio(label="上传音频长度小于90秒")
vc_transform0 = gr.Number(
label="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):", value=0
label="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):",
value=0,
)
f0method0 = gr.Radio(
label="选择音高Extract算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU",
@ -122,7 +123,9 @@ with app:
)
but0 = gr.Button("Convert", variant="primary")
vc_output1 = gr.Textbox(label="Output information")
vc_output2 = gr.Audio(label="Export audio (click on the three dots in the lower right corner to download)")
vc_output2 = gr.Audio(
label="Export audio (click on the three dots in the lower right corner to download)"
)
but0.click(
vc.vc_single,
[

View File

@ -1,11 +1,52 @@
#!/usr/bin/env python3
import os
from pathlib import Path
import subprocess
import sys
import shutil
import stat
import requests
from pathlib import Path
RVC_DOWNLOAD_LINK = "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"
###########################
# Part 1: Clone Spark TTS #
###########################
BASE_DIR = Path(__file__).resolve().parent.parent
def run_command(command, error_message):
try:
subprocess.run(command, check=True)
except subprocess.CalledProcessError:
print(error_message)
sys.exit(1)
def clone_spark_tts():
# Create the directory spark/pretrained_models if it doesn't exist.
spark_pretrained_dir = os.path.join("spark", "pretrained_models")
os.makedirs(spark_pretrained_dir, exist_ok=True)
print("Running 'git lfs install'...")
run_command(
["git", "lfs", "install"],
"Error: Failed to run 'git lfs install'. Make sure git-lfs is installed (https://git-lfs.com).",
)
clone_dir = os.path.join(spark_pretrained_dir, "Spark-TTS-0.5B")
if not os.path.exists(clone_dir):
print(f"Cloning Spark TTS repository into {clone_dir}...")
run_command(
[
"git",
"clone",
"https://huggingface.co/SparkAudio/Spark-TTS-0.5B",
clone_dir,
],
"Error: Failed to clone the Spark TTS repository.",
)
else:
print(f"Directory '{clone_dir}' already exists. Skipping clone.")
#############################
# Part 2: Download RVC Assets #
#############################
def dl_model(link, model_name, dir_name):
with requests.get(f"{link}{model_name}") as r:
@ -15,59 +56,56 @@ def dl_model(link, model_name, dir_name):
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
def download_rvc_models():
RVC_DOWNLOAD_LINK = "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"
# Set BASE_DIR to the project root. If this script is in ./tools, we go one level up.
BASE_DIR = Path(__file__).resolve().parent.parent
def check_and_dl(link, model_name, dest_dir):
dest_file = dest_dir / model_name
if dest_file.exists():
print(f"{model_name} already exists at {dest_file}. Skipping download.")
else:
print(f"Downloading {model_name}...")
dl_model(link, model_name, dest_dir)
if __name__ == "__main__":
print("Downloading hubert_base.pt...")
dl_model(RVC_DOWNLOAD_LINK, "hubert_base.pt", BASE_DIR / "assets/hubert")
check_and_dl(RVC_DOWNLOAD_LINK, "hubert_base.pt", BASE_DIR / "assets" / "hubert")
print("Downloading rmvpe.pt...")
dl_model(RVC_DOWNLOAD_LINK, "rmvpe.pt", BASE_DIR / "assets/rmvpe")
check_and_dl(RVC_DOWNLOAD_LINK, "rmvpe.pt", BASE_DIR / "assets" / "rmvpe")
print("Downloading rmvpe.onnx...")
dl_model(RVC_DOWNLOAD_LINK, "rmvpe.onnx", BASE_DIR / "assets/rmvpe")
check_and_dl(RVC_DOWNLOAD_LINK, "rmvpe.onnx", BASE_DIR / "assets" / "rmvpe")
print("Downloading vocals.onnx...")
dl_model(
RVC_DOWNLOAD_LINK + "uvr5_weights/onnx_dereverb_By_FoxJoy/",
"vocals.onnx",
BASE_DIR / "assets/uvr5_weights/onnx_dereverb_By_FoxJoy",
)
dl_model(RVC_DOWNLOAD_LINK, "ffprobe.exe", BASE_DIR / "./")
vocals_dir = BASE_DIR / "assets" / "uvr5_weights" / "onnx_dereverb_By_FoxJoy"
check_and_dl(RVC_DOWNLOAD_LINK + "uvr5_weights/onnx_dereverb_By_FoxJoy/", "vocals.onnx", vocals_dir)
print("Downloading ffprobe.exe...")
dl_model(RVC_DOWNLOAD_LINK, "ffmpeg.exe", BASE_DIR / "./")
check_and_dl(RVC_DOWNLOAD_LINK, "ffprobe.exe", BASE_DIR / ".")
print("Downloading ffmpeg.exe...")
check_and_dl(RVC_DOWNLOAD_LINK, "ffmpeg.exe", BASE_DIR / ".")
rvc_models_dir = BASE_DIR / "assets/pretrained"
rvc_models_dir = BASE_DIR / "assets" / "pretrained"
print("Downloading pretrained models:")
model_names = [
"D32k.pth",
"D40k.pth",
"D48k.pth",
"G32k.pth",
"G40k.pth",
"G48k.pth",
"f0D32k.pth",
"f0D40k.pth",
"f0D48k.pth",
"f0G32k.pth",
"f0G40k.pth",
"f0G48k.pth",
"D32k.pth", "D40k.pth", "D48k.pth",
"G32k.pth", "G40k.pth", "G48k.pth",
"f0D32k.pth", "f0D40k.pth", "f0D48k.pth",
"f0G32k.pth", "f0G40k.pth", "f0G48k.pth",
]
for model in model_names:
print(f"Downloading {model}...")
dl_model(RVC_DOWNLOAD_LINK + "pretrained/", model, rvc_models_dir)
rvc_models_dir = BASE_DIR / "assets/pretrained_v2"
check_and_dl(RVC_DOWNLOAD_LINK + "pretrained/", model, rvc_models_dir)
rvc_models_dir = BASE_DIR / "assets" / "pretrained_v2"
print("Downloading pretrained models v2:")
for model in model_names:
print(f"Downloading {model}...")
dl_model(RVC_DOWNLOAD_LINK + "pretrained_v2/", model, rvc_models_dir)
check_and_dl(RVC_DOWNLOAD_LINK + "pretrained_v2/", model, rvc_models_dir)
print("Downloading uvr5_weights:")
rvc_models_dir = BASE_DIR / "assets/uvr5_weights"
rvc_models_dir = BASE_DIR / "assets" / "uvr5_weights"
model_names = [
"HP2-%E4%BA%BA%E5%A3%B0vocals%2B%E9%9D%9E%E4%BA%BA%E5%A3%B0instrumentals.pth",
"HP2_all_vocals.pth",
@ -79,7 +117,17 @@ if __name__ == "__main__":
"VR-DeEchoNormal.pth",
]
for model in model_names:
print(f"Downloading {model}...")
dl_model(RVC_DOWNLOAD_LINK + "uvr5_weights/", model, rvc_models_dir)
check_and_dl(RVC_DOWNLOAD_LINK + "uvr5_weights/", model, rvc_models_dir)
print("All models downloaded!")
##########################
# Main: Run both parts #
##########################
def main():
clone_spark_tts()
download_rvc_models()
if __name__ == "__main__":
main()

View File

@ -5,6 +5,7 @@ from pathlib import Path
import tempfile
import shutil
def download_file(url: str, local_path: Path):
"""Download a file from a URL to a local path."""
if local_path.exists():
@ -19,6 +20,7 @@ def download_file(url: str, local_path: Path):
f.write(chunk)
print(f"[DONE] Downloaded {local_path}")
def extract_and_move(zip_path: Path, logs_dir: Path, weights_dir: Path):
"""Extract a zip file and move .index files to logs and .pth files to assets/weights."""
logs_dir.mkdir(parents=True, exist_ok=True)
@ -41,6 +43,7 @@ def extract_and_move(zip_path: Path, logs_dir: Path, weights_dir: Path):
shutil.move(str(extracted_file), str(dest))
print("[CLEANUP] Extraction complete.")
def main():
parser = argparse.ArgumentParser(
description="Download a model zip file, extract it, and place .index files in ./logs and .pth files in ./assets/weights."
@ -68,5 +71,6 @@ def main():
zip_path.unlink()
print("[COMPLETE] Model download and extraction complete.")
if __name__ == "__main__":
main()

View File

@ -1,4 +1,3 @@
import torch
# a=torch.load(r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-suc\G_1000.pth")["model"]#sim_nsf#

View File

@ -9,4 +9,3 @@ torchgate imports all the functions from PyTorch, and in addition provides:
TorchGating --- A PyTorch module that applies a spectral gate to an input signal
"""