Open3

『Pythonで学ぶ音声合成』読みながらのメモ

nabeyangnabeyang

図5-6を描く

sr, x = wavfile.read(ttslearn.util.example_audio_file())
x = x.astype(np.float64)
f0, timeaxis = pyworld.dio(x, sr)
lf0 = f0_to_lf0(f0)
clf0 = interp1d(lf0, kind="linear")

plt.plot(timeaxis, f0, label="F0")
plt.plot(timeaxis, np.exp(clf0), '--', label="Continous F0")
plt.xlim((0.25, 1.43))
plt.xticks(np.arange(0.3, 1.5, 0.2))
plt.legend()
plt.show()

nabeyangnabeyang

図5-7を描く

sr, x = wavfile.read(ttslearn.util.example_audio_file())
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 4))
librosa.display.waveplot(x.astype(np.float32), sr, ax=ax1)
ax1.set_title('Waveform')
ax1.set_xlabel("Time [sec]")
ax1.set_ylabel("Amplitude")

f0, timeaxis = pyworld.dio(x.astype(np.float64), sr)
vuv = (f0 > 0).astype(np.float32)
ax2.plot(timeaxis, vuv)
ax2.set_title('U/UV')
ax2.set_xlabel("Time [sec]")
ax2.set_ylabel("Binary value")


xticks = np.arange(0.3, 1.5, 0.2)
xlim = (0.25, 1.43)
ax1.set_xticks(xticks)
ax2.set_xticks(xticks)
ax1.set_xlim(xlim)
ax2.set_xlim(xlim)

fig.tight_layout()
fig.show()