From e96ba57c9f620fcdcbfebb480ed2270b7063bef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=9D=CE=B1=CF=81=CE=BF=CF=85=CF=83=CE=AD=C2=B7=CE=BC?= =?UTF-8?q?=C2=B7=CE=B3=CE=B9=CE=BF=CF=85=CE=BC=CE=B5=CE=BC=CE=AF=C2=B7?= =?UTF-8?q?=CE=A7=CE=B9=CE=BD=CE=B1=CE=BA=CE=AC=CE=BD=CE=BD=CE=B1?= <40709280+NaruseMioShirakana@users.noreply.github.com> Date: Mon, 24 Apr 2023 19:40:39 +0800 Subject: [PATCH] Add files via upload --- export_onnx.py | 118 +++++++++++++++++++++++++++++---------------- export_onnx_old.py | 47 ++++++++++++++++++ 2 files changed, 123 insertions(+), 42 deletions(-) create mode 100644 export_onnx_old.py diff --git a/export_onnx.py b/export_onnx.py index d4a8c62..8b62b47 100644 --- a/export_onnx.py +++ b/export_onnx.py @@ -1,47 +1,81 @@ -from infer_pack.models_onnx import SynthesizerTrnMs256NSFsid +from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM +from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO import torch -person = "Shiroha/shiroha.pth" -exported_path = "model.onnx" +if __name__ == '__main__': + MoeVS = True #模型是否为MoeVoiceStudio(原MoeSS)使用 + ModelPath = "Shiroha/shiroha.pth" #模型路径 + ExportedPath = "model.onnx" #输出路径 + hidden_channels = 256 # hidden_channels,为768Vec做准备 + cpt = torch.load(ModelPath, map_location="cpu") + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk + print(*cpt["config"]) -cpt = torch.load(person, map_location="cpu") -cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk -print(*cpt["config"]) -net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=False) -net_g.load_state_dict(cpt["weight"], strict=False) + test_phone = torch.rand(1, 200, hidden_channels) # hidden unit + test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用) + test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹) + test_pitchf = torch.rand(1, 200) # nsf基频 + test_ds = torch.LongTensor([0]) # 说话人ID + test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子) -test_phone = torch.rand(1, 200, 256) -test_phone_lengths = torch.tensor([200]).long() -test_pitch = torch.randint(size=(1, 200), low=5, high=255) -test_pitchf = torch.rand(1, 200) -test_ds = torch.LongTensor([0]) -test_rnd = torch.rand(1, 192, 200) -input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"] -output_names = [ - "audio", -] -device = "cpu" -torch.onnx.export( - net_g, - ( - test_phone.to(device), - test_phone_lengths.to(device), - test_pitch.to(device), - test_pitchf.to(device), - test_ds.to(device), - test_rnd.to(device), - ), - exported_path, - dynamic_axes={ - "phone": [1], - "pitch": [1], - "pitchf": [1], - "rnd": [2], - }, - do_constant_folding=False, - opset_version=16, - verbose=False, - input_names=input_names, - output_names=output_names, -) + device = "cpu" #导出时设备(不影响使用模型) + + if MoeVS: + net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16) + net_g.load_state_dict(cpt["weight"], strict=False) + input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"] + output_names = [ + "audio", + ] + torch.onnx.export( + net_g, + ( + test_phone.to(device), + test_phone_lengths.to(device), + test_pitch.to(device), + test_pitchf.to(device), + test_ds.to(device), + test_rnd.to(device), + ), + ExportedPath, + dynamic_axes={ + "phone": [1], + "pitch": [1], + "pitchf": [1], + "rnd": [2], + }, + do_constant_folding=False, + opset_version=16, + verbose=False, + input_names=input_names, + output_names=output_names, + ) + else: + net_g = SynthesizerTrnMs256NSFsidO(*cpt["config"], is_half=False) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16) + net_g.load_state_dict(cpt["weight"], strict=False) + input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"] + output_names = [ + "audio", + ] + torch.onnx.export( + net_g, + ( + test_phone.to(device), + test_phone_lengths.to(device), + test_pitch.to(device), + test_pitchf.to(device), + test_ds.to(device), + ), + ExportedPath, + dynamic_axes={ + "phone": [1], + "pitch": [1], + "pitchf": [1], + }, + do_constant_folding=False, + opset_version=16, + verbose=False, + input_names=input_names, + output_names=output_names, + ) \ No newline at end of file diff --git a/export_onnx_old.py b/export_onnx_old.py new file mode 100644 index 0000000..bff6d06 --- /dev/null +++ b/export_onnx_old.py @@ -0,0 +1,47 @@ +from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM +import torch + +person = "Shiroha/shiroha.pth" +exported_path = "model.onnx" + + +cpt = torch.load(person, map_location="cpu") +cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk +print(*cpt["config"]) +net_g = SynthesizerTrnMs256NSFsidM(*cpt["config"], is_half=False) +net_g.load_state_dict(cpt["weight"], strict=False) + +test_phone = torch.rand(1, 200, 256) +test_phone_lengths = torch.tensor([200]).long() +test_pitch = torch.randint(size=(1, 200), low=5, high=255) +test_pitchf = torch.rand(1, 200) +test_ds = torch.LongTensor([0]) +test_rnd = torch.rand(1, 192, 200) +input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"] +output_names = [ + "audio", +] +device = "cpu" +torch.onnx.export( + net_g, + ( + test_phone.to(device), + test_phone_lengths.to(device), + test_pitch.to(device), + test_pitchf.to(device), + test_ds.to(device), + test_rnd.to(device), + ), + exported_path, + dynamic_axes={ + "phone": [1], + "pitch": [1], + "pitchf": [1], + "rnd": [2], + }, + do_constant_folding=False, + opset_version=16, + verbose=False, + input_names=input_names, + output_names=output_names, +)