[maintenance] cleanup one click training and related (#219)

- remove unused imports
- remove unused gpus6 param from train1key fn
- improve readability and reusability for various pathing strings
 main
This commit is contained in:
Sebastian Gabriel Savu 2023-05-05 16:48:39 +01:00 committed by GitHub
parent 4027928a8e
commit 4abd0bd680
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 35 deletions

View File

@ -1,6 +1,4 @@
import argparse import argparse
import glob
import sys
import torch import torch
from multiprocessing import cpu_count from multiprocessing import cpu_count

View File

@ -1,5 +1,5 @@
from multiprocessing import cpu_count from multiprocessing import cpu_count
import threading, pdb, librosa import threading
from time import sleep from time import sleep
from subprocess import Popen from subprocess import Popen
from time import sleep from time import sleep
@ -714,7 +714,6 @@ def train1key(
if_f0_3, if_f0_3,
trainset_dir4, trainset_dir4,
spk_id5, spk_id5,
gpus6,
np7, np7,
f0method8, f0method8,
save_epoch10, save_epoch10,
@ -732,35 +731,40 @@ def train1key(
infos.append(strr) infos.append(strr)
return "\n".join(infos) return "\n".join(infos)
os.makedirs("%s/logs/%s" % (now_dir, exp_dir1), exist_ok=True) model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1)
preprocess_log_path = "%s/preprocess.log" % model_log_dir
extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
feature256_dir = "%s/3_feature256" % model_log_dir
os.makedirs(model_log_dir, exist_ok=True)
#########step1:处理数据 #########step1:处理数据
open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir1), "w").close() open(preprocess_log_path, "w").close()
cmd = ( cmd = (
config.python_cmd config.python_cmd
+ " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s " + " trainset_preprocess_pipeline_print.py %s %s %s %s "
% (trainset_dir4, sr_dict[sr2], ncpu, now_dir, exp_dir1) % (trainset_dir4, sr_dict[sr2], ncpu, model_log_dir)
+ str(config.noparallel) + str(config.noparallel)
) )
yield get_info_str(i18n("step1:正在处理数据")) yield get_info_str(i18n("step1:正在处理数据"))
yield get_info_str(cmd) yield get_info_str(cmd)
p = Popen(cmd, shell=True) p = Popen(cmd, shell=True)
p.wait() p.wait()
with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir1), "r") as f: with open(preprocess_log_path, "r") as f:
print(f.read()) print(f.read())
#########step2a:提取音高 #########step2a:提取音高
open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir1), "w") open(extract_f0_feature_log_path, "w")
if if_f0_3 == i18n(""): if if_f0_3 == i18n(""):
yield get_info_str("step2a:正在提取音高") yield get_info_str("step2a:正在提取音高")
cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s" % ( cmd = config.python_cmd + " extract_f0_print.py %s %s %s" % (
now_dir, model_log_dir,
exp_dir1,
np7, np7,
f0method8, f0method8,
) )
yield get_info_str(cmd) yield get_info_str(cmd)
p = Popen(cmd, shell=True, cwd=now_dir) p = Popen(cmd, shell=True, cwd=now_dir)
p.wait() p.wait()
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir1), "r") as f: with open(extract_f0_feature_log_path, "r") as f:
print(f.read()) print(f.read())
else: else:
yield get_info_str(i18n("step2a:无需提取音高")) yield get_info_str(i18n("step2a:无需提取音高"))
@ -770,13 +774,12 @@ def train1key(
leng = len(gpus) leng = len(gpus)
ps = [] ps = []
for idx, n_g in enumerate(gpus): for idx, n_g in enumerate(gpus):
cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s/logs/%s" % ( cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s" % (
config.device, config.device,
leng, leng,
idx, idx,
n_g, n_g,
now_dir, model_log_dir
exp_dir1,
) )
yield get_info_str(cmd) yield get_info_str(cmd)
p = Popen( p = Popen(
@ -785,26 +788,23 @@ def train1key(
ps.append(p) ps.append(p)
for p in ps: for p in ps:
p.wait() p.wait()
with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir1), "r") as f: with open(extract_f0_feature_log_path, "r") as f:
print(f.read()) print(f.read())
#######step3a:训练模型 #######step3a:训练模型
yield get_info_str(i18n("step3a:正在训练模型")) yield get_info_str(i18n("step3a:正在训练模型"))
# 生成filelist # 生成filelist
exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
co256_dir = "%s/3_feature256" % (exp_dir)
if if_f0_3 == i18n(""): if if_f0_3 == i18n(""):
f0_dir = "%s/2a_f0" % (exp_dir) f0_dir = "%s/2a_f0" % model_log_dir
f0nsf_dir = "%s/2b-f0nsf" % (exp_dir) f0nsf_dir = "%s/2b-f0nsf" % model_log_dir
names = ( names = (
set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
& set([name.split(".")[0] for name in os.listdir(co256_dir)]) & set([name.split(".")[0] for name in os.listdir(feature256_dir)])
& set([name.split(".")[0] for name in os.listdir(f0_dir)]) & set([name.split(".")[0] for name in os.listdir(f0_dir)])
& set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
) )
else: else:
names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
[name.split(".")[0] for name in os.listdir(co256_dir)] [name.split(".")[0] for name in os.listdir(feature256_dir)]
) )
opt = [] opt = []
for name in names: for name in names:
@ -814,7 +814,7 @@ def train1key(
% ( % (
gt_wavs_dir.replace("\\", "\\\\"), gt_wavs_dir.replace("\\", "\\\\"),
name, name,
co256_dir.replace("\\", "\\\\"), feature256_dir.replace("\\", "\\\\"),
name, name,
f0_dir.replace("\\", "\\\\"), f0_dir.replace("\\", "\\\\"),
name, name,
@ -829,7 +829,7 @@ def train1key(
% ( % (
gt_wavs_dir.replace("\\", "\\\\"), gt_wavs_dir.replace("\\", "\\\\"),
name, name,
co256_dir.replace("\\", "\\\\"), feature256_dir.replace("\\", "\\\\"),
name, name,
spk_id5, spk_id5,
) )
@ -847,7 +847,7 @@ def train1key(
% (now_dir, sr2, now_dir, spk_id5) % (now_dir, sr2, now_dir, spk_id5)
) )
shuffle(opt) shuffle(opt)
with open("%s/filelist.txt" % exp_dir, "w") as f: with open("%s/filelist.txt" % model_log_dir, "w") as f:
f.write("\n".join(opt)) f.write("\n".join(opt))
yield get_info_str("write filelist done") yield get_info_str("write filelist done")
if gpus16: if gpus16:
@ -890,17 +890,18 @@ def train1key(
p.wait() p.wait()
yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")) yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
#######step3b:训练索引 #######step3b:训练索引
feature_dir = "%s/3_feature256" % (exp_dir)
npys = [] npys = []
listdir_res = list(os.listdir(feature_dir)) listdir_res = list(os.listdir(feature256_dir))
for name in sorted(listdir_res): for name in sorted(listdir_res):
phone = np.load("%s/%s" % (feature_dir, name)) phone = np.load("%s/%s" % (feature256_dir, name))
npys.append(phone) npys.append(phone)
big_npy = np.concatenate(npys, 0) big_npy = np.concatenate(npys, 0)
big_npy_idx = np.arange(big_npy.shape[0]) big_npy_idx = np.arange(big_npy.shape[0])
np.random.shuffle(big_npy_idx) np.random.shuffle(big_npy_idx)
big_npy = big_npy[big_npy_idx] big_npy = big_npy[big_npy_idx]
np.save("%s/total_fea.npy" % exp_dir, big_npy) np.save("%s/total_fea.npy" % model_log_dir, big_npy)
# n_ivf = big_npy.shape[0] // 39 # n_ivf = big_npy.shape[0] // 39
n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
yield get_info_str("%s,%s" % (big_npy.shape, n_ivf)) yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
@ -912,7 +913,7 @@ def train1key(
index.train(big_npy) index.train(big_npy)
faiss.write_index( faiss.write_index(
index, index,
"%s/trained_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe), "%s/trained_IVF%s_Flat_nprobe_%s.index" % (model_log_dir, n_ivf, index_ivf.nprobe),
) )
yield get_info_str("adding index") yield get_info_str("adding index")
batch_size_add = 8192 batch_size_add = 8192
@ -920,7 +921,7 @@ def train1key(
index.add(big_npy[i : i + batch_size_add]) index.add(big_npy[i : i + batch_size_add])
faiss.write_index( faiss.write_index(
index, index,
"%s/added_IVF%s_Flat_nprobe_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe), "%s/added_IVF%s_Flat_nprobe_%s.index" % (model_log_dir, n_ivf, index_ivf.nprobe),
) )
yield get_info_str( yield get_info_str(
"成功构建索引, added_IVF%s_Flat_nprobe_%s.index" % (n_ivf, index_ivf.nprobe) "成功构建索引, added_IVF%s_Flat_nprobe_%s.index" % (n_ivf, index_ivf.nprobe)
@ -1392,7 +1393,6 @@ with gr.Blocks() as app:
if_f0_3, if_f0_3,
trainset_dir4, trainset_dir4,
spk_id5, spk_id5,
gpus6,
np7, np7,
f0method8, f0method8,
save_epoch10, save_epoch10,