chore(format): run black on dev

This commit is contained in:
github-actions[bot] 2024-06-02 13:48:14 +00:00
parent b9ad0258ae
commit d414b9c8aa
6 changed files with 143 additions and 71 deletions

View File

@ -847,10 +847,7 @@ with gr.Blocks(title="RVC WebUI") as app:
value=0, value=0,
) )
input_audio0 = gr.File( input_audio0 = gr.File(
label=i18n( label=i18n("待处理音频文件"), file_types=["audio"]
"待处理音频文件"
),
file_types=["audio"]
) )
file_index2 = gr.Dropdown( file_index2 = gr.Dropdown(
label=i18n("自动检测index路径,下拉式选择(dropdown)"), label=i18n("自动检测index路径,下拉式选择(dropdown)"),
@ -937,28 +934,28 @@ with gr.Blocks(title="RVC WebUI") as app:
api_name="infer_refresh", api_name="infer_refresh",
) )
with gr.Group(): with gr.Group():
vc_output1 = gr.Textbox(label=i18n("输出信息")) vc_output1 = gr.Textbox(label=i18n("输出信息"))
but0.click( but0.click(
vc.vc_single, vc.vc_single,
[ [
spk_item, spk_item,
input_audio0, input_audio0,
vc_transform0, vc_transform0,
f0_file, f0_file,
f0method0, f0method0,
file_index1, file_index1,
file_index2, file_index2,
# file_big_npy1, # file_big_npy1,
index_rate1, index_rate1,
filter_radius0, filter_radius0,
resample_sr0, resample_sr0,
rms_mix_rate0, rms_mix_rate0,
protect0, protect0,
], ],
[vc_output1, vc_output2], [vc_output1, vc_output2],
api_name="infer_convert", api_name="infer_convert",
) )
with gr.TabItem(i18n("批量推理")): with gr.TabItem(i18n("批量推理")):
gr.Markdown( gr.Markdown(
value=i18n( value=i18n(
@ -990,9 +987,7 @@ with gr.Blocks(title="RVC WebUI") as app:
interactive=True, interactive=True,
) )
file_index3 = gr.File( file_index3 = gr.File(
label=i18n( label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
"特征检索库文件路径,为空则使用下拉的选择结果"
),
) )
refresh_button.click( refresh_button.click(
@ -1099,7 +1094,14 @@ with gr.Blocks(title="RVC WebUI") as app:
sid0.change( sid0.change(
fn=vc.get_vc, fn=vc.get_vc,
inputs=[sid0, protect0, protect1], inputs=[sid0, protect0, protect1],
outputs=[spk_item, protect0, protect1, file_index2, file_index4, modelinfo], outputs=[
spk_item,
protect0,
protect1,
file_index2,
file_index4,
modelinfo,
],
api_name="infer_change_voice", api_name="infer_change_voice",
) )
with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")): with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):

View File

@ -10,6 +10,7 @@ from infer.modules.vc import model_hash_ckpt, hash_id
i18n = I18nAuto() i18n = I18nAuto()
# add author sign # add author sign
def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps): def save_small_model(ckpt, sr, if_f0, name, epoch, version, hps):
try: try:

View File

@ -7,6 +7,7 @@ from pybase16384 import encode_to_string, decode_from_string
if __name__ == "__main__": if __name__ == "__main__":
import os, sys import os, sys
now_dir = os.getcwd() now_dir = os.getcwd()
sys.path.append(now_dir) sys.path.append(now_dir)
@ -17,6 +18,7 @@ from .utils import load_hubert
from infer.lib.audio import load_audio from infer.lib.audio import load_audio
class TorchSeedContext: class TorchSeedContext:
def __init__(self, seed): def __init__(self, seed):
self.seed = seed self.seed = seed
@ -29,27 +31,38 @@ class TorchSeedContext:
def __exit__(self, type, value, traceback): def __exit__(self, type, value, traceback):
torch.random.set_rng_state(self.state) torch.random.set_rng_state(self.state)
half_hash_len = 512 half_hash_len = 512
expand_factor = 65536*8 expand_factor = 65536 * 8
@singleton_variable @singleton_variable
def original_audio_time_minus(): def original_audio_time_minus():
__original_audio = load_audio(str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000) __original_audio = load_audio(
str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000
)
np.divide(__original_audio, np.abs(__original_audio).max(), __original_audio) np.divide(__original_audio, np.abs(__original_audio).max(), __original_audio)
return -__original_audio return -__original_audio
@singleton_variable @singleton_variable
def original_audio_freq_minus(): def original_audio_freq_minus():
__original_audio = load_audio(str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000) __original_audio = load_audio(
str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000
)
np.divide(__original_audio, np.abs(__original_audio).max(), __original_audio) np.divide(__original_audio, np.abs(__original_audio).max(), __original_audio)
__original_audio = fft(__original_audio) __original_audio = fft(__original_audio)
return -__original_audio return -__original_audio
def _cut_u16(n): def _cut_u16(n):
if n > 16384: n = 16384 + 16384*(1-np.exp((16384-n)/expand_factor)) if n > 16384:
elif n < -16384: n = -16384 - 16384*(1-np.exp((n+16384)/expand_factor)) n = 16384 + 16384 * (1 - np.exp((16384 - n) / expand_factor))
elif n < -16384:
n = -16384 - 16384 * (1 - np.exp((n + 16384) / expand_factor))
return n return n
# wave_hash will change time_field, use carefully # wave_hash will change time_field, use carefully
def wave_hash(time_field): def wave_hash(time_field):
np.divide(time_field, np.abs(time_field).max(), time_field) np.divide(time_field, np.abs(time_field).max(), time_field)
@ -60,35 +73,56 @@ def wave_hash(time_field):
raise Exception("freq not hashable") raise Exception("freq not hashable")
np.add(time_field, original_audio_time_minus(), out=time_field) np.add(time_field, original_audio_time_minus(), out=time_field)
np.add(freq_field, original_audio_freq_minus(), out=freq_field) np.add(freq_field, original_audio_freq_minus(), out=freq_field)
hash = np.zeros(half_hash_len//2*2, dtype='>i2') hash = np.zeros(half_hash_len // 2 * 2, dtype=">i2")
d = 375 * 512 // half_hash_len d = 375 * 512 // half_hash_len
for i in range(half_hash_len//4): for i in range(half_hash_len // 4):
a = i*2 a = i * 2
b = a+1 b = a + 1
x = a + half_hash_len//2 x = a + half_hash_len // 2
y = x+1 y = x + 1
s = np.average(freq_field[i*d:(i+1)*d]) s = np.average(freq_field[i * d : (i + 1) * d])
hash[a] = np.int16(_cut_u16(round(32768*np.real(s)))) hash[a] = np.int16(_cut_u16(round(32768 * np.real(s))))
hash[b] = np.int16(_cut_u16(round(32768*np.imag(s)))) hash[b] = np.int16(_cut_u16(round(32768 * np.imag(s))))
hash[x] = np.int16(_cut_u16(round(32768*np.sum(time_field[i*d:i*d+d//2])))) hash[x] = np.int16(
hash[y] = np.int16(_cut_u16(round(32768*np.sum(time_field[i*d+d//2:(i+1)*d])))) _cut_u16(round(32768 * np.sum(time_field[i * d : i * d + d // 2])))
)
hash[y] = np.int16(
_cut_u16(round(32768 * np.sum(time_field[i * d + d // 2 : (i + 1) * d])))
)
return encode_to_string(hash.tobytes()) return encode_to_string(hash.tobytes())
def audio_hash(file): def audio_hash(file):
return wave_hash(load_audio(file, 16000)) return wave_hash(load_audio(file, 16000))
def model_hash(config, tgt_sr, net_g, if_f0, version): def model_hash(config, tgt_sr, net_g, if_f0, version):
pipeline = Pipeline(tgt_sr, config) pipeline = Pipeline(tgt_sr, config)
audio = load_audio(str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000) audio = load_audio(str(pathlib.Path(__file__).parent / "lgdsng.mp3"), 16000)
audio_max = np.abs(audio).max() / 0.95 audio_max = np.abs(audio).max() / 0.95
if audio_max > 1: if audio_max > 1:
np.divide(audio, audio_max, audio) np.divide(audio, audio_max, audio)
audio_opt = pipeline.pipeline(load_hubert(config.device, config.is_half), net_g, 0, audio, audio_opt = pipeline.pipeline(
[0, 0, 0], 6, "rmvpe", "", 0, if_f0, 3, tgt_sr, 16000, 0.25, load_hubert(config.device, config.is_half),
version, 0.33) net_g,
0,
audio,
[0, 0, 0],
6,
"rmvpe",
"",
0,
if_f0,
3,
tgt_sr,
16000,
0.25,
version,
0.33,
)
opt_len = len(audio_opt) opt_len = len(audio_opt)
diff = 48000 - opt_len diff = 48000 - opt_len
n = diff//2 n = diff // 2
if n > 0: if n > 0:
audio_opt = np.pad(audio_opt, (n, n)) audio_opt = np.pad(audio_opt, (n, n))
elif n < 0: elif n < 0:
@ -98,6 +132,7 @@ def model_hash(config, tgt_sr, net_g, if_f0, version):
del pipeline, audio, audio_opt del pipeline, audio, audio_opt
return h return h
def model_hash_ckpt(cpt): def model_hash_ckpt(cpt):
from infer.lib.infer_pack.models import ( from infer.lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid,
@ -105,6 +140,7 @@ def model_hash_ckpt(cpt):
SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid,
SynthesizerTrnMs768NSFsid_nono, SynthesizerTrnMs768NSFsid_nono,
) )
config = Config() config = Config()
with TorchSeedContext(114514): with TorchSeedContext(114514):
tgt_sr = cpt["config"][-1] tgt_sr = cpt["config"][-1]
@ -116,9 +152,9 @@ def model_hash_ckpt(cpt):
("v2", 1): SynthesizerTrnMs768NSFsid, ("v2", 1): SynthesizerTrnMs768NSFsid,
("v2", 0): SynthesizerTrnMs768NSFsid_nono, ("v2", 0): SynthesizerTrnMs768NSFsid_nono,
} }
net_g = synthesizer_class.get( net_g = synthesizer_class.get((version, if_f0), SynthesizerTrnMs256NSFsid)(
(version, if_f0), SynthesizerTrnMs256NSFsid *cpt["config"], is_half=config.is_half
)(*cpt["config"], is_half=config.is_half) )
del net_g.enc_q del net_g.enc_q
@ -135,36 +171,47 @@ def model_hash_ckpt(cpt):
return h return h
def model_hash_from(path): def model_hash_from(path):
cpt = torch.load(path, map_location="cpu") cpt = torch.load(path, map_location="cpu")
h = model_hash_ckpt(cpt) h = model_hash_ckpt(cpt)
del cpt del cpt
return h return h
def _extend_difference(n, a, b): def _extend_difference(n, a, b):
if n < a: n = a if n < a:
elif n > b: n = b n = a
elif n > b:
n = b
n -= a n -= a
n /= (b-a) n /= b - a
return n return n
def hash_similarity(h1: str, h2: str) -> int: def hash_similarity(h1: str, h2: str) -> int:
h1b, h2b = decode_from_string(h1), decode_from_string(h2) h1b, h2b = decode_from_string(h1), decode_from_string(h2)
if len(h1b) != half_hash_len*2 or len(h2b) != half_hash_len*2: if len(h1b) != half_hash_len * 2 or len(h2b) != half_hash_len * 2:
raise Exception("invalid hash length") raise Exception("invalid hash length")
h1n, h2n = np.frombuffer(h1b, dtype='>i2'), np.frombuffer(h2b, dtype='>i2') h1n, h2n = np.frombuffer(h1b, dtype=">i2"), np.frombuffer(h2b, dtype=">i2")
d = 0 d = 0
for i in range(half_hash_len//4): for i in range(half_hash_len // 4):
a = i*2 a = i * 2
b = a+1 b = a + 1
ax = complex(h1n[a], h1n[b]) ax = complex(h1n[a], h1n[b])
bx = complex(h2n[a], h2n[b]) bx = complex(h2n[a], h2n[b])
if abs(ax) == 0 or abs(bx) == 0: continue if abs(ax) == 0 or abs(bx) == 0:
continue
d += np.abs(ax - bx) d += np.abs(ax - bx)
frac = (np.linalg.norm(h1n) * np.linalg.norm(h2n)) frac = np.linalg.norm(h1n) * np.linalg.norm(h2n)
cosine = np.dot(h1n.astype(np.float32), h2n.astype(np.float32)) / frac if frac != 0 else 1.0 cosine = (
distance = _extend_difference(np.exp(-d/expand_factor), 0.5, 1.0) np.dot(h1n.astype(np.float32), h2n.astype(np.float32)) / frac
if frac != 0
else 1.0
)
distance = _extend_difference(np.exp(-d / expand_factor), 0.5, 1.0)
return round((abs(cosine) + distance) / 2, 6) return round((abs(cosine) + distance) / 2, 6)
def hash_id(h: str) -> str: def hash_id(h: str) -> str:
return encode_to_string(hashlib.md5(decode_from_string(h)).digest())[:-1] return encode_to_string(hashlib.md5(decode_from_string(h)).digest())[:-1]

View File

@ -7,6 +7,7 @@ from .hash import model_hash_ckpt, hash_id
i18n = I18nAuto() i18n = I18nAuto()
def show_model_info(cpt, show_long_id=False): def show_model_info(cpt, show_long_id=False):
try: try:
h = model_hash_ckpt(cpt) h = model_hash_ckpt(cpt)
@ -14,10 +15,27 @@ def show_model_info(cpt, show_long_id=False):
idread = cpt.get("id", "None") idread = cpt.get("id", "None")
hread = cpt.get("hash", "None") hread = cpt.get("hash", "None")
if id != idread: if id != idread:
id += "("+i18n("实际计算")+"), "+idread+"("+i18n("从模型中读取")+")" id += (
if not show_long_id: h = i18n("不显示") "("
+ i18n("实际计算")
+ "), "
+ idread
+ "("
+ i18n("从模型中读取")
+ ")"
)
if not show_long_id:
h = i18n("不显示")
elif h != hread: elif h != hread:
h += "("+i18n("实际计算")+"), "+hread+"("+i18n("从模型中读取")+")" h += (
"("
+ i18n("实际计算")
+ "), "
+ hread
+ "("
+ i18n("从模型中读取")
+ ")"
)
txt = f"""{i18n("模型名")}: %s txt = f"""{i18n("模型名")}: %s
{i18n("封装时间")}: %s {i18n("封装时间")}: %s
{i18n("信息")}: %s {i18n("信息")}: %s
@ -32,13 +50,15 @@ def show_model_info(cpt, show_long_id=False):
cpt.get("sr", "None"), cpt.get("sr", "None"),
i18n("") if cpt.get("f0", 0) == 1 else i18n(""), i18n("") if cpt.get("f0", 0) == 1 else i18n(""),
cpt.get("version", "None"), cpt.get("version", "None"),
id, h id,
h,
) )
except: except:
txt = traceback.format_exc() txt = traceback.format_exc()
return txt return txt
def show_info(path): def show_info(path):
try: try:
a = torch.load(path, map_location="cpu") a = torch.load(path, map_location="cpu")

View File

@ -136,7 +136,7 @@ class VC:
to_return_protect1, to_return_protect1,
index, index,
index, index,
show_model_info(self.cpt) show_model_info(self.cpt),
) )
if to_return_protect if to_return_protect
else {"visible": True, "maximum": n_spk, "__type__": "update"} else {"visible": True, "maximum": n_spk, "__type__": "update"}
@ -173,7 +173,8 @@ class VC:
self.hubert_model = load_hubert(self.config.device, self.config.is_half) self.hubert_model = load_hubert(self.config.device, self.config.is_half)
if file_index: if file_index:
if hasattr(file_index, "name"): file_index = str(file_index.name) if hasattr(file_index, "name"):
file_index = str(file_index.name)
file_index = ( file_index = (
file_index.strip(" ") file_index.strip(" ")
.strip('"') .strip('"')

View File

@ -114,6 +114,7 @@ class Pipeline(object):
) )
elif f0_method == "harvest": elif f0_method == "harvest":
from hashlib import md5 from hashlib import md5
f0_cache_key = md5(x.tobytes()).digest() f0_cache_key = md5(x.tobytes()).digest()
input_audio_path2wav[f0_cache_key] = x.astype(np.double) input_audio_path2wav[f0_cache_key] = x.astype(np.double)
f0 = cache_harvest_f0(f0_cache_key, self.sr, f0_max, f0_min, 10) f0 = cache_harvest_f0(f0_cache_key, self.sr, f0_max, f0_min, 10)