ADD

# Add audio operation module: # Add audio reading function # Add amplitude spectrogram, DB - Scaled spectrogram conversion module # Preliminary implementation from audio access to MFCC, MEL, MAG
2021-04-11 00:03:32 +08:00 · 2021-04-11 00:03:32 +08:00 · cd2b668870
parent aaf51ceac8
commit cd2b668870
1 changed files with 68 additions and 0 deletions
--- a/audio_operation.py
+++ b/audio_operation.py
@ -0,0 +1,68 @@
+import librosa
+import scipy
+import scipy.fftpack
+import numpy as np
+
+import hparams
+
+
+def read_wav(path, sr, duration=None, mono=True):
+    wav, sr = librosa.load(path=path, sr=sr, mono=mono, duration=duration)
+    return wav
+
+
+def amp2db(amp):
+    return librosa.amplitude_to_db(amp)
+
+
+def db2amp(db):
+    return librosa.db_to_amplitude(db)
+
+
+def _get_mfcc_and_spec(wav, sr, n_fft, hop_length, win_length, n_mels, n_mfcc):
+
+    print(wav.shape)
+
+    # Get spectrogram
+    # (1 + n_fft/2, t)
+    spec = librosa.stft(y=wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
+    mag = np.abs(spec)
+    print(mag.shape)
+
+    # Get mel-spectrogram
+    # (n_mels, 1+n_fft//2)
+    mel_basis = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels)
+    # mel spectrogram (n_mels, t)
+    mel = np.dot(mel_basis, mag)
+    print(mel.shape)
+
+    # amp to db
+    mag_db = amp2db(mag)
+    mel_db = amp2db(mel)
+    print(mag_db.shape)
+    print(mel_db.shape)
+
+    # Get mfccs
+    mfccs = scipy.fftpack.dct(mel_db, axis=0, type=2, norm='ortho')[:n_mfcc]
+    print(mfccs.shape)
+
+    # (t, n_mfccs), (t, 1+n_fft/2), (t, n_mels)
+    return mfccs.T, mag_db.T, mel_db.T
+
+
+def get_mfccs_and_phones(wav_file, trim=False, random_crop=True):
+    sr = hparams.timit_sr
+    n_fft = hparams.timit_n_fft
+    hop_length = hparams.timit_hop_length
+    win_length = hparams.timit_wim_length
+    n_mels = hparams.timit_n_mels
+    n_mfcc = hparams.timit_n_mfcc
+
+    # Load wav
+    wav = read_wav(wav_file, sr)
+
+    mfcc, _, _ = _get_mfcc_and_spec(wav, sr, n_fft, hop_length, win_length, n_mels, n_mfcc)
+
+    # TODO : get phones and return
+
+    return mfcc