fix clipping problem in resample

resample sometimes causes signal clipping, not just librosa.resample
This commit is contained in:
autumnmotor 2023-04-19 10:37:18 +09:00
parent 30d5f02a3d
commit fbd0e615dc
2 changed files with 15 additions and 2 deletions

View File

@ -98,7 +98,9 @@ class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset):
sampling_rate, self.sampling_rate sampling_rate, self.sampling_rate
) )
) )
audio_norm = audio / self.max_wav_value # audio_norm = audio / self.max_wav_value
# audio_norm = audio / np.abs(audio).max()
audio_norm = audio_norm.unsqueeze(0) audio_norm = audio_norm.unsqueeze(0)
spec_filename = filename.replace(".wav", ".spec.pt") spec_filename = filename.replace(".wav", ".spec.pt")
if os.path.exists(spec_filename): if os.path.exists(spec_filename):
@ -287,7 +289,9 @@ class TextAudioLoader(torch.utils.data.Dataset):
sampling_rate, self.sampling_rate sampling_rate, self.sampling_rate
) )
) )
audio_norm = audio / self.max_wav_value # audio_norm = audio / self.max_wav_value
# audio_norm = audio / np.abs(audio).max()
audio_norm = audio_norm.unsqueeze(0) audio_norm = audio_norm.unsqueeze(0)
spec_filename = filename.replace(".wav", ".spec.pt") spec_filename = filename.replace(".wav", ".spec.pt")
if os.path.exists(spec_filename): if os.path.exists(spec_filename):

View File

@ -65,6 +65,15 @@ class PreProcess:
# default resample type of librosa.resample is "soxr_hq". # default resample type of librosa.resample is "soxr_hq".
# Quality: soxr_vhq > soxr_hq # Quality: soxr_vhq > soxr_hq
tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000, res_type="soxr_vhq") tmp_audio = librosa.resample(tmp_audio, orig_sr=self.sr, target_sr=16000, res_type="soxr_vhq")
tmp_audio = (tmp_audio / np.abs(tmp_audio).max() * (self.max * self.alpha)) + (
1 - self.alpha
) * tmp_audio
wavfile.write(
"%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1),
self.sr,
(tmp_audio * 1).astype(np.float32),
)
wavfile.write( wavfile.write(
"%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1), "%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1),
16000, 16000,