mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-05-07 04:09:06 +08:00
chore(format): run black on dev (#2090)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
b9ad0258ae
commit
8767e11cf1
18
infer-web.py
18
infer-web.py
@ -847,10 +847,7 @@ with gr.Blocks(title="RVC WebUI") as app:
|
|||||||
value=0,
|
value=0,
|
||||||
)
|
)
|
||||||
input_audio0 = gr.File(
|
input_audio0 = gr.File(
|
||||||
label=i18n(
|
label=i18n("待处理音频文件"), file_types=["audio"]
|
||||||
"待处理音频文件"
|
|
||||||
),
|
|
||||||
file_types=["audio"]
|
|
||||||
)
|
)
|
||||||
file_index2 = gr.Dropdown(
|
file_index2 = gr.Dropdown(
|
||||||
label=i18n("自动检测index路径,下拉式选择(dropdown)"),
|
label=i18n("自动检测index路径,下拉式选择(dropdown)"),
|
||||||
@ -990,9 +987,7 @@ with gr.Blocks(title="RVC WebUI") as app:
|
|||||||
interactive=True,
|
interactive=True,
|
||||||
)
|
)
|
||||||
file_index3 = gr.File(
|
file_index3 = gr.File(
|
||||||
label=i18n(
|
label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
|
||||||
"特征检索库文件路径,为空则使用下拉的选择结果"
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
refresh_button.click(
|
refresh_button.click(
|
||||||
@ -1099,7 +1094,14 @@ with gr.Blocks(title="RVC WebUI") as app:
|
|||||||
sid0.change(
|
sid0.change(
|
||||||
fn=vc.get_vc,
|
fn=vc.get_vc,
|
||||||
inputs=[sid0, protect0, protect1],
|
inputs=[sid0, protect0, protect1],
|
||||||
outputs=[spk_item, protect0, protect1, file_index2, file_index4, modelinfo],
|
outputs=[
|
||||||
|
spk_item,
|
||||||
|
protect0,
|
||||||
|
protect1,
|
||||||
|
file_index2,
|
||||||
|
file_index4,
|
||||||
|
modelinfo,
|
||||||
|
],
|
||||||
api_name="infer_change_voice",
|
api_name="infer_change_voice",
|
||||||
)
|
)
|
||||||
with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
|
with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
|
||||||
|
@ -10,6 +10,7 @@ from infer.modules.vc import model_hash_ckpt, hash_id
|
|||||||
|
|
||||||
i18n = I18nAuto()
|
i18n = I18nAuto()
|
||||||
|
|
||||||
|
|
||||||
# add author sign
|
# add author sign
|
||||||
def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps):
|
def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps):
|
||||||
try:
|
try:
|
||||||
|
@ -7,6 +7,7 @@ from pybase16384 import encode_to_string, decode_from_string
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import os, sys
|
import os, sys
|
||||||
|
|
||||||
now_dir = os.getcwd()
|
now_dir = os.getcwd()
|
||||||
sys.path.append(now_dir)
|
sys.path.append(now_dir)
|
||||||
|
|
||||||
@ -17,6 +18,7 @@ from .utils import load_hubert
|
|||||||
|
|
||||||
from infer.lib.audio import load_audio
|
from infer.lib.audio import load_audio
|
||||||
|
|
||||||
|
|
||||||
class TorchSeedContext:
|
class TorchSeedContext:
|
||||||
def __init__(self, seed):
|
def __init__(self, seed):
|
||||||
self.seed = seed
|
self.seed = seed
|
||||||
@ -29,27 +31,38 @@ class TorchSeedContext:
|
|||||||
def __exit__(self, type, value, traceback):
|
def __exit__(self, type, value, traceback):
|
||||||
torch.random.set_rng_state(self.state)
|
torch.random.set_rng_state(self.state)
|
||||||
|
|
||||||
|
|
||||||
half_hash_len = 512
|
half_hash_len = 512
|
||||||
expand_factor = 65536*8
|
expand_factor = 65536 * 8
|
||||||
|
|
||||||
|
|
||||||
@singleton_variable
|
@singleton_variable
|
||||||
def original_audio_time_minus():
|
def original_audio_time_minus():
|
||||||
__original_audio = load_audio(str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000)
|
__original_audio = load_audio(
|
||||||
|
str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000
|
||||||
|
)
|
||||||
np.divide(__original_audio, np.abs(__original_audio).max(), __original_audio)
|
np.divide(__original_audio, np.abs(__original_audio).max(), __original_audio)
|
||||||
return -__original_audio
|
return -__original_audio
|
||||||
|
|
||||||
|
|
||||||
@singleton_variable
|
@singleton_variable
|
||||||
def original_audio_freq_minus():
|
def original_audio_freq_minus():
|
||||||
__original_audio = load_audio(str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000)
|
__original_audio = load_audio(
|
||||||
|
str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000
|
||||||
|
)
|
||||||
np.divide(__original_audio, np.abs(__original_audio).max(), __original_audio)
|
np.divide(__original_audio, np.abs(__original_audio).max(), __original_audio)
|
||||||
__original_audio = fft(__original_audio)
|
__original_audio = fft(__original_audio)
|
||||||
return -__original_audio
|
return -__original_audio
|
||||||
|
|
||||||
|
|
||||||
def _cut_u16(n):
|
def _cut_u16(n):
|
||||||
if n > 16384: n = 16384 + 16384*(1-np.exp((16384-n)/expand_factor))
|
if n > 16384:
|
||||||
elif n < -16384: n = -16384 - 16384*(1-np.exp((n+16384)/expand_factor))
|
n = 16384 + 16384 * (1 - np.exp((16384 - n) / expand_factor))
|
||||||
|
elif n < -16384:
|
||||||
|
n = -16384 - 16384 * (1 - np.exp((n + 16384) / expand_factor))
|
||||||
return n
|
return n
|
||||||
|
|
||||||
|
|
||||||
# wave_hash will change time_field, use carefully
|
# wave_hash will change time_field, use carefully
|
||||||
def wave_hash(time_field):
|
def wave_hash(time_field):
|
||||||
np.divide(time_field, np.abs(time_field).max(), time_field)
|
np.divide(time_field, np.abs(time_field).max(), time_field)
|
||||||
@ -60,35 +73,56 @@ def wave_hash(time_field):
|
|||||||
raise Exception("freq not hashable")
|
raise Exception("freq not hashable")
|
||||||
np.add(time_field, original_audio_time_minus(), out=time_field)
|
np.add(time_field, original_audio_time_minus(), out=time_field)
|
||||||
np.add(freq_field, original_audio_freq_minus(), out=freq_field)
|
np.add(freq_field, original_audio_freq_minus(), out=freq_field)
|
||||||
hash = np.zeros(half_hash_len//2*2, dtype='>i2')
|
hash = np.zeros(half_hash_len // 2 * 2, dtype=">i2")
|
||||||
d = 375 * 512 // half_hash_len
|
d = 375 * 512 // half_hash_len
|
||||||
for i in range(half_hash_len//4):
|
for i in range(half_hash_len // 4):
|
||||||
a = i*2
|
a = i * 2
|
||||||
b = a+1
|
b = a + 1
|
||||||
x = a + half_hash_len//2
|
x = a + half_hash_len // 2
|
||||||
y = x+1
|
y = x + 1
|
||||||
s = np.average(freq_field[i*d:(i+1)*d])
|
s = np.average(freq_field[i * d : (i + 1) * d])
|
||||||
hash[a] = np.int16(_cut_u16(round(32768*np.real(s))))
|
hash[a] = np.int16(_cut_u16(round(32768 * np.real(s))))
|
||||||
hash[b] = np.int16(_cut_u16(round(32768*np.imag(s))))
|
hash[b] = np.int16(_cut_u16(round(32768 * np.imag(s))))
|
||||||
hash[x] = np.int16(_cut_u16(round(32768*np.sum(time_field[i*d:i*d+d//2]))))
|
hash[x] = np.int16(
|
||||||
hash[y] = np.int16(_cut_u16(round(32768*np.sum(time_field[i*d+d//2:(i+1)*d]))))
|
_cut_u16(round(32768 * np.sum(time_field[i * d : i * d + d // 2])))
|
||||||
|
)
|
||||||
|
hash[y] = np.int16(
|
||||||
|
_cut_u16(round(32768 * np.sum(time_field[i * d + d // 2 : (i + 1) * d])))
|
||||||
|
)
|
||||||
return encode_to_string(hash.tobytes())
|
return encode_to_string(hash.tobytes())
|
||||||
|
|
||||||
|
|
||||||
def audio_hash(file):
|
def audio_hash(file):
|
||||||
return wave_hash(load_audio(file, 16000))
|
return wave_hash(load_audio(file, 16000))
|
||||||
|
|
||||||
|
|
||||||
def model_hash(config, tgt_sr, net_g, if_f0, version):
|
def model_hash(config, tgt_sr, net_g, if_f0, version):
|
||||||
pipeline = Pipeline(tgt_sr, config)
|
pipeline = Pipeline(tgt_sr, config)
|
||||||
audio = load_audio(str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000)
|
audio = load_audio(str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000)
|
||||||
audio_max = np.abs(audio).max() / 0.95
|
audio_max = np.abs(audio).max() / 0.95
|
||||||
if audio_max > 1:
|
if audio_max > 1:
|
||||||
np.divide(audio, audio_max, audio)
|
np.divide(audio, audio_max, audio)
|
||||||
audio_opt = pipeline.pipeline(load_hubert(config.device, config.is_half), net_g, 0, audio,
|
audio_opt = pipeline.pipeline(
|
||||||
[0, 0, 0], 6, "rmvpe", "", 0, if_f0, 3, tgt_sr, 16000, 0.25,
|
load_hubert(config.device, config.is_half),
|
||||||
version, 0.33)
|
net_g,
|
||||||
|
0,
|
||||||
|
audio,
|
||||||
|
[0, 0, 0],
|
||||||
|
6,
|
||||||
|
"rmvpe",
|
||||||
|
"",
|
||||||
|
0,
|
||||||
|
if_f0,
|
||||||
|
3,
|
||||||
|
tgt_sr,
|
||||||
|
16000,
|
||||||
|
0.25,
|
||||||
|
version,
|
||||||
|
0.33,
|
||||||
|
)
|
||||||
opt_len = len(audio_opt)
|
opt_len = len(audio_opt)
|
||||||
diff = 48000 - opt_len
|
diff = 48000 - opt_len
|
||||||
n = diff//2
|
n = diff // 2
|
||||||
if n > 0:
|
if n > 0:
|
||||||
audio_opt = np.pad(audio_opt, (n, n))
|
audio_opt = np.pad(audio_opt, (n, n))
|
||||||
elif n < 0:
|
elif n < 0:
|
||||||
@ -98,6 +132,7 @@ def model_hash(config, tgt_sr, net_g, if_f0, version):
|
|||||||
del pipeline, audio, audio_opt
|
del pipeline, audio, audio_opt
|
||||||
return h
|
return h
|
||||||
|
|
||||||
|
|
||||||
def model_hash_ckpt(cpt):
|
def model_hash_ckpt(cpt):
|
||||||
from infer.lib.infer_pack.models import (
|
from infer.lib.infer_pack.models import (
|
||||||
SynthesizerTrnMs256NSFsid,
|
SynthesizerTrnMs256NSFsid,
|
||||||
@ -105,6 +140,7 @@ def model_hash_ckpt(cpt):
|
|||||||
SynthesizerTrnMs768NSFsid,
|
SynthesizerTrnMs768NSFsid,
|
||||||
SynthesizerTrnMs768NSFsid_nono,
|
SynthesizerTrnMs768NSFsid_nono,
|
||||||
)
|
)
|
||||||
|
|
||||||
config = Config()
|
config = Config()
|
||||||
with TorchSeedContext(114514):
|
with TorchSeedContext(114514):
|
||||||
tgt_sr = cpt["config"][-1]
|
tgt_sr = cpt["config"][-1]
|
||||||
@ -116,9 +152,9 @@ def model_hash_ckpt(cpt):
|
|||||||
("v2", 1): SynthesizerTrnMs768NSFsid,
|
("v2", 1): SynthesizerTrnMs768NSFsid,
|
||||||
("v2", 0): SynthesizerTrnMs768NSFsid_nono,
|
("v2", 0): SynthesizerTrnMs768NSFsid_nono,
|
||||||
}
|
}
|
||||||
net_g = synthesizer_class.get(
|
net_g = synthesizer_class.get((version, if_f0), SynthesizerTrnMs256NSFsid)(
|
||||||
(version, if_f0), SynthesizerTrnMs256NSFsid
|
*cpt["config"], is_half=config.is_half
|
||||||
)(*cpt["config"], is_half=config.is_half)
|
)
|
||||||
|
|
||||||
del net_g.enc_q
|
del net_g.enc_q
|
||||||
|
|
||||||
@ -135,36 +171,47 @@ def model_hash_ckpt(cpt):
|
|||||||
|
|
||||||
return h
|
return h
|
||||||
|
|
||||||
|
|
||||||
def model_hash_from(path):
|
def model_hash_from(path):
|
||||||
cpt = torch.load(path, map_location="cpu")
|
cpt = torch.load(path, map_location="cpu")
|
||||||
h = model_hash_ckpt(cpt)
|
h = model_hash_ckpt(cpt)
|
||||||
del cpt
|
del cpt
|
||||||
return h
|
return h
|
||||||
|
|
||||||
|
|
||||||
def _extend_difference(n, a, b):
|
def _extend_difference(n, a, b):
|
||||||
if n < a: n = a
|
if n < a:
|
||||||
elif n > b: n = b
|
n = a
|
||||||
|
elif n > b:
|
||||||
|
n = b
|
||||||
n -= a
|
n -= a
|
||||||
n /= (b-a)
|
n /= b - a
|
||||||
return n
|
return n
|
||||||
|
|
||||||
|
|
||||||
def hash_similarity(h1: str, h2: str) -> int:
|
def hash_similarity(h1: str, h2: str) -> int:
|
||||||
h1b, h2b = decode_from_string(h1), decode_from_string(h2)
|
h1b, h2b = decode_from_string(h1), decode_from_string(h2)
|
||||||
if len(h1b) != half_hash_len*2 or len(h2b) != half_hash_len*2:
|
if len(h1b) != half_hash_len * 2 or len(h2b) != half_hash_len * 2:
|
||||||
raise Exception("invalid hash length")
|
raise Exception("invalid hash length")
|
||||||
h1n, h2n = np.frombuffer(h1b, dtype='>i2'), np.frombuffer(h2b, dtype='>i2')
|
h1n, h2n = np.frombuffer(h1b, dtype=">i2"), np.frombuffer(h2b, dtype=">i2")
|
||||||
d = 0
|
d = 0
|
||||||
for i in range(half_hash_len//4):
|
for i in range(half_hash_len // 4):
|
||||||
a = i*2
|
a = i * 2
|
||||||
b = a+1
|
b = a + 1
|
||||||
ax = complex(h1n[a], h1n[b])
|
ax = complex(h1n[a], h1n[b])
|
||||||
bx = complex(h2n[a], h2n[b])
|
bx = complex(h2n[a], h2n[b])
|
||||||
if abs(ax) == 0 or abs(bx) == 0: continue
|
if abs(ax) == 0 or abs(bx) == 0:
|
||||||
|
continue
|
||||||
d += np.abs(ax - bx)
|
d += np.abs(ax - bx)
|
||||||
frac = (np.linalg.norm(h1n) * np.linalg.norm(h2n))
|
frac = np.linalg.norm(h1n) * np.linalg.norm(h2n)
|
||||||
cosine = np.dot(h1n.astype(np.float32), h2n.astype(np.float32)) / frac if frac != 0 else 1.0
|
cosine = (
|
||||||
distance = _extend_difference(np.exp(-d/expand_factor), 0.5, 1.0)
|
np.dot(h1n.astype(np.float32), h2n.astype(np.float32)) / frac
|
||||||
|
if frac != 0
|
||||||
|
else 1.0
|
||||||
|
)
|
||||||
|
distance = _extend_difference(np.exp(-d / expand_factor), 0.5, 1.0)
|
||||||
return round((abs(cosine) + distance) / 2, 6)
|
return round((abs(cosine) + distance) / 2, 6)
|
||||||
|
|
||||||
|
|
||||||
def hash_id(h: str) -> str:
|
def hash_id(h: str) -> str:
|
||||||
return encode_to_string(hashlib.md5(decode_from_string(h)).digest())[:-1]
|
return encode_to_string(hashlib.md5(decode_from_string(h)).digest())[:-1]
|
||||||
|
@ -7,6 +7,7 @@ from .hash import model_hash_ckpt, hash_id
|
|||||||
|
|
||||||
i18n = I18nAuto()
|
i18n = I18nAuto()
|
||||||
|
|
||||||
|
|
||||||
def show_model_info(cpt, show_long_id=False):
|
def show_model_info(cpt, show_long_id=False):
|
||||||
try:
|
try:
|
||||||
h = model_hash_ckpt(cpt)
|
h = model_hash_ckpt(cpt)
|
||||||
@ -14,10 +15,27 @@ def show_model_info(cpt, show_long_id=False):
|
|||||||
idread = cpt.get("id", "None")
|
idread = cpt.get("id", "None")
|
||||||
hread = cpt.get("hash", "None")
|
hread = cpt.get("hash", "None")
|
||||||
if id != idread:
|
if id != idread:
|
||||||
id += "("+i18n("实际计算")+"), "+idread+"("+i18n("从模型中读取")+")"
|
id += (
|
||||||
if not show_long_id: h = i18n("不显示")
|
"("
|
||||||
|
+ i18n("实际计算")
|
||||||
|
+ "), "
|
||||||
|
+ idread
|
||||||
|
+ "("
|
||||||
|
+ i18n("从模型中读取")
|
||||||
|
+ ")"
|
||||||
|
)
|
||||||
|
if not show_long_id:
|
||||||
|
h = i18n("不显示")
|
||||||
elif h != hread:
|
elif h != hread:
|
||||||
h += "("+i18n("实际计算")+"), "+hread+"("+i18n("从模型中读取")+")"
|
h += (
|
||||||
|
"("
|
||||||
|
+ i18n("实际计算")
|
||||||
|
+ "), "
|
||||||
|
+ hread
|
||||||
|
+ "("
|
||||||
|
+ i18n("从模型中读取")
|
||||||
|
+ ")"
|
||||||
|
)
|
||||||
txt = f"""{i18n("模型名")}: %s
|
txt = f"""{i18n("模型名")}: %s
|
||||||
{i18n("封装时间")}: %s
|
{i18n("封装时间")}: %s
|
||||||
{i18n("信息")}: %s
|
{i18n("信息")}: %s
|
||||||
@ -32,13 +50,15 @@ def show_model_info(cpt, show_long_id=False):
|
|||||||
cpt.get("sr", "None"),
|
cpt.get("sr", "None"),
|
||||||
i18n("有") if cpt.get("f0", 0) == 1 else i18n("无"),
|
i18n("有") if cpt.get("f0", 0) == 1 else i18n("无"),
|
||||||
cpt.get("version", "None"),
|
cpt.get("version", "None"),
|
||||||
id, h
|
id,
|
||||||
|
h,
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
txt = traceback.format_exc()
|
txt = traceback.format_exc()
|
||||||
|
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
|
|
||||||
def show_info(path):
|
def show_info(path):
|
||||||
try:
|
try:
|
||||||
a = torch.load(path, map_location="cpu")
|
a = torch.load(path, map_location="cpu")
|
||||||
|
@ -136,7 +136,7 @@ class VC:
|
|||||||
to_return_protect1,
|
to_return_protect1,
|
||||||
index,
|
index,
|
||||||
index,
|
index,
|
||||||
show_model_info(self.cpt)
|
show_model_info(self.cpt),
|
||||||
)
|
)
|
||||||
if to_return_protect
|
if to_return_protect
|
||||||
else {"visible": True, "maximum": n_spk, "__type__": "update"}
|
else {"visible": True, "maximum": n_spk, "__type__": "update"}
|
||||||
@ -173,7 +173,8 @@ class VC:
|
|||||||
self.hubert_model = load_hubert(self.config.device, self.config.is_half)
|
self.hubert_model = load_hubert(self.config.device, self.config.is_half)
|
||||||
|
|
||||||
if file_index:
|
if file_index:
|
||||||
if hasattr(file_index, "name"): file_index = str(file_index.name)
|
if hasattr(file_index, "name"):
|
||||||
|
file_index = str(file_index.name)
|
||||||
file_index = (
|
file_index = (
|
||||||
file_index.strip(" ")
|
file_index.strip(" ")
|
||||||
.strip('"')
|
.strip('"')
|
||||||
|
@ -114,6 +114,7 @@ class Pipeline(object):
|
|||||||
)
|
)
|
||||||
elif f0_method == "harvest":
|
elif f0_method == "harvest":
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
|
|
||||||
f0_cache_key = md5(x.tobytes()).digest()
|
f0_cache_key = md5(x.tobytes()).digest()
|
||||||
input_audio_path2wav[f0_cache_key] = x.astype(np.double)
|
input_audio_path2wav[f0_cache_key] = x.astype(np.double)
|
||||||
f0 = cache_harvest_f0(f0_cache_key, self.sr, f0_max, f0_min, 10)
|
f0 = cache_harvest_f0(f0_cache_key, self.sr, f0_max, f0_min, 10)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user