update
# Updated model.Net1: # Implement Net1 module using model.modules # Test Net1 functionality # Updated model.modules: # Optimize the details of the model.modules # Updated hparams: # Add train_logits_temperature, phns_len and other settings
This commit is contained in:
parent
250b0dc1fd
commit
bfdeff5f90
|
@ -1,3 +1,7 @@
|
|||
from audio_operation import phns
|
||||
|
||||
phns_len = len(phns)
|
||||
|
||||
# TIMIT
|
||||
timit_sr = 16000
|
||||
timit_n_mfcc = 40
|
||||
|
@ -12,3 +16,5 @@ timit_default_duration = 2
|
|||
net1_train_dataset = "./data/dataset/TIMIT/TRAIN/*/*/*.wav"
|
||||
net1_train_batch_size = 32
|
||||
net1_train_num_workers = 4
|
||||
|
||||
net1_train_logits_t = 1.0 # temperature
|
||||
|
|
|
@ -1,13 +1,68 @@
|
|||
from torch.nn import Module
|
||||
import torch
|
||||
from torch.nn import Module, Linear, Softmax
|
||||
from .modules import PreNet, CBHG
|
||||
|
||||
import hparams
|
||||
|
||||
|
||||
class Net1(Module):
|
||||
def __init__(self):
|
||||
def __init__(self, in_dims, train1_hidden_units, dropout_rate, num_conv1d_banks, num_highway_blocks):
|
||||
super().__init__()
|
||||
|
||||
# TODO : init the net1 model
|
||||
# in_dims = n_mfcc, out_dims_1 = 2*out_dims_2 = train1_hidden_units
|
||||
self.pre_net = PreNet(in_dims=in_dims,
|
||||
out_dims_1=train1_hidden_units,
|
||||
dropout_rate=dropout_rate)
|
||||
|
||||
def forward(self):
|
||||
# TODO : implement the net1 structure
|
||||
# num_conv1d_banks = train1_num_conv1d_banks, num_highway_blocks = train1_num_highway_blocks
|
||||
# in_dims = train1_hidden_units // 2, out_dims = train1_hidden_units // 2
|
||||
# activation=torch.nn.ReLU()
|
||||
self.cbhg = CBHG(num_conv1d_banks=num_conv1d_banks,
|
||||
num_highway_blocks=num_highway_blocks,
|
||||
in_dims=train1_hidden_units // 2,
|
||||
out_dims=train1_hidden_units // 2,
|
||||
activation=torch.nn.ReLU())
|
||||
|
||||
pass
|
||||
# in_features = train1_hidden_units, out_features = phns_len
|
||||
self.logits = Linear(in_features=train1_hidden_units, out_features=hparams.phns_len)
|
||||
self.softmax = Softmax(dim=-1)
|
||||
|
||||
def forward(self, inputs):
|
||||
# inputs : (N, L_in, in_dims)
|
||||
# in_dims = n_mfcc
|
||||
|
||||
# PreNet
|
||||
pre_net_outputs = self.pre_net(inputs)
|
||||
# pre_net_outputs : (N, L_in, train1_hidden_units // 2)
|
||||
|
||||
# change data format
|
||||
cbhg_inputs = pre_net_outputs.transpose(2, 1)
|
||||
# pre_net_outputs : (N, train1_hidden_units // 2, L_in)
|
||||
|
||||
# CBHG
|
||||
cbhg_outputs = self.cbhg(cbhg_inputs)
|
||||
# cbhg_outputs : (N, L_in, train1_hidden_units)
|
||||
|
||||
# Final linear projection
|
||||
logits_outputs = self.logits(cbhg_outputs)
|
||||
# logits_outputs : (N, L_in, phns_len)
|
||||
|
||||
ppgs = self.softmax(logits_outputs / hparams.net1_train_logits_t)
|
||||
# ppgs : (N, L_in, phns_len)
|
||||
|
||||
preds = torch.argmax(logits_outputs, dim=-1).int()
|
||||
# preds = (N, L_in)
|
||||
|
||||
debug = True
|
||||
if debug:
|
||||
print("pre_net_outputs : " + str(pre_net_outputs.shape))
|
||||
print("cbhg_inputs : " + str(cbhg_inputs.shape))
|
||||
print("cbhg_outputs : " + str(cbhg_outputs.shape))
|
||||
print("logits_outputs : " + str(logits_outputs.shape))
|
||||
print("ppgs : " + str(ppgs.shape))
|
||||
print("preds : " + str(preds.shape) + " , preds.type : " + str(preds.dtype))
|
||||
|
||||
# ppgs : (N, L_in, phns_len)
|
||||
# preds : (N, L_in)
|
||||
# logits_outputs : (N, L_in, phns_len)
|
||||
return ppgs, preds, logits_outputs
|
||||
|
|
|
@ -4,9 +4,12 @@ from torch.nn import Linear, Conv1d, MaxPool1d, Dropout, BatchNorm1d, ReLU, Sigm
|
|||
|
||||
|
||||
class PreNet(Module):
|
||||
def __init__(self, in_dims, out_dims_1, out_dims_2, dropout_rate):
|
||||
def __init__(self, in_dims, out_dims_1, out_dims_2=None, dropout_rate=0):
|
||||
super(PreNet, self).__init__()
|
||||
|
||||
if out_dims_2 is None:
|
||||
out_dims_2 = out_dims_1 // 2
|
||||
|
||||
self.relu = ReLU()
|
||||
self.drop = Dropout(dropout_rate)
|
||||
|
||||
|
@ -150,17 +153,17 @@ class Conv1dBanks(Module):
|
|||
|
||||
|
||||
class CBHG(Module):
|
||||
def __init__(self, k, num_highway_blocks, in_dims, out_dims, activation):
|
||||
def __init__(self, num_conv1d_banks, num_highway_blocks, in_dims, out_dims, activation):
|
||||
super(CBHG, self).__init__()
|
||||
|
||||
self.num_highway_blocks = num_highway_blocks
|
||||
|
||||
self.conv1d_banks = Conv1dBanks(k, in_dims, out_dims, activation)
|
||||
self.conv1d_banks = Conv1dBanks(num_conv1d_banks, in_dims, out_dims, activation)
|
||||
|
||||
# Since kernel_size = 2, padding + 1
|
||||
self.max_pool1d = MaxPool1d(kernel_size=2, stride=1, padding=1)
|
||||
|
||||
self.projection1 = Conv1dNorm(in_dims=k * out_dims, out_dims=out_dims,
|
||||
self.projection1 = Conv1dNorm(in_dims=num_conv1d_banks * out_dims, out_dims=out_dims,
|
||||
kernel_size=3, activation_fn=activation)
|
||||
self.projection2 = Conv1dNorm(in_dims=out_dims, out_dims=out_dims,
|
||||
kernel_size=3, activation_fn=None)
|
||||
|
|
Loading…
Reference in New Issue