diff --git a/lib/infer_pack/models.py b/lib/infer_pack/models.py index 698fc8f..3a9cd77 100644 --- a/lib/infer_pack/models.py +++ b/lib/infer_pack/models.py @@ -631,12 +631,17 @@ class SynthesizerTrnMs256NSFsid(nn.Module): o = self.dec(z_slice, pitchf, g=g) return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) - def infer(self, phone, phone_lengths, pitch, nsff0, sid, max_len=None): + def infer(self, phone, phone_lengths, pitch, nsff0, sid, rate=None): g = self.emb_g(sid).unsqueeze(-1) m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if(rate): + head=int(z_p.shape[2]*rate) + z_p=z_p[:,:,-head:] + x_mask=x_mask[:,:,-head:] + nsff0=nsff0[:,-head:] z = self.flow(z_p, x_mask, g=g, reverse=True) - o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g) + o = self.dec(z * x_mask, nsff0, g=g) return o, x_mask, (z, z_p, m_p, logs_p) @@ -742,12 +747,17 @@ class SynthesizerTrnMs768NSFsid(nn.Module): o = self.dec(z_slice, pitchf, g=g) return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) - def infer(self, phone, phone_lengths, pitch, nsff0, sid, max_len=None): + def infer(self, phone, phone_lengths, pitch, nsff0, sid, rate=None): g = self.emb_g(sid).unsqueeze(-1) m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if(rate): + head=int(z_p.shape[2]*rate) + z_p=z_p[:,:,-head:] + x_mask=x_mask[:,:,-head:] + nsff0=nsff0[:,-head:] z = self.flow(z_p, x_mask, g=g, reverse=True) - o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g) + o = self.dec(z * x_mask, nsff0, g=g) return o, x_mask, (z, z_p, m_p, logs_p) @@ -844,12 +854,16 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module): o = self.dec(z_slice, g=g) return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) - def infer(self, phone, phone_lengths, sid, max_len=None): + def infer(self, phone, phone_lengths, sid, rate=None): g = self.emb_g(sid).unsqueeze(-1) m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if(rate): + head=int(z_p.shape[2]*rate) + z_p=z_p[:,:,-head:] + x_mask=x_mask[:,:,-head:] z = self.flow(z_p, x_mask, g=g, reverse=True) - o = self.dec((z * x_mask)[:, :, :max_len], g=g) + o = self.dec(z * x_mask, g=g) return o, x_mask, (z, z_p, m_p, logs_p) @@ -946,12 +960,16 @@ class SynthesizerTrnMs768NSFsid_nono(nn.Module): o = self.dec(z_slice, g=g) return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) - def infer(self, phone, phone_lengths, sid, max_len=None): + def infer(self, phone, phone_lengths, sid, rate=None): g = self.emb_g(sid).unsqueeze(-1) m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if(rate): + head=int(z_p.shape[2]*rate) + z_p=z_p[:,:,-head:] + x_mask=x_mask[:,:,-head:] z = self.flow(z_p, x_mask, g=g, reverse=True) - o = self.dec((z * x_mask)[:, :, :max_len], g=g) + o = self.dec(z * x_mask, g=g) return o, x_mask, (z, z_p, m_p, logs_p)