89 lines
2.5 KiB
Python
89 lines
2.5 KiB
Python
import tensorflow as tf
|
||
from text import symbols
|
||
|
||
class hparams:
|
||
def __init__(self) -> None:
|
||
super().__init__()
|
||
|
||
################################
|
||
# Experiment Parameters #
|
||
################################
|
||
epochs = 3 #500
|
||
iters_per_checkpoint = 1000
|
||
seed = 1234
|
||
dynamic_loss_scaling = True
|
||
fp16_run = False
|
||
distributed_run = False
|
||
dist_backend = "nccl"
|
||
dist_url = "tcp://localhost:54321"
|
||
cudnn_enabled = True
|
||
cudnn_benchmark = True
|
||
ignore_layers = ['embedding.weight']
|
||
|
||
################################
|
||
# Data Parameters #
|
||
################################
|
||
load_mel_from_disk = False #实际上是区别用 numpy读wav ,还是用scipy读wav
|
||
training_files = 'filelists/zh_audio_text_train_filelist.txt'
|
||
validation_files = 'filelists/zh_audio_text_val_filelist.txt'
|
||
text_cleaners = ['english_cleaners']
|
||
|
||
################################
|
||
# Audio Parameters #
|
||
################################
|
||
max_wav_value = 32768.0
|
||
sampling_rate = 22050 #22050
|
||
filter_length = 1024
|
||
hop_length = 256
|
||
win_length = 1024
|
||
n_mel_channels = 80
|
||
mel_fmin = 0.0
|
||
mel_fmax = 8000.0
|
||
|
||
################################
|
||
# Model Parameters #
|
||
################################
|
||
n_symbols = len(symbols)
|
||
symbols_embedding_dim = 512
|
||
|
||
# Encoder parameters
|
||
encoder_kernel_size = 5
|
||
encoder_n_convolutions = 3
|
||
encoder_embedding_dim = 512
|
||
|
||
# Decoder parameters
|
||
n_frames_per_step = 1 # currently only 1 is supported
|
||
decoder_rnn_dim = 1024
|
||
prenet_dim = 256
|
||
max_decoder_steps = 1000
|
||
gate_threshold = 0.5
|
||
p_attention_dropout = 0.1
|
||
p_decoder_dropout = 0.1
|
||
|
||
# Attention parameters
|
||
attention_rnn_dim = 1024
|
||
attention_dim = 128
|
||
|
||
# Location Layer parameters
|
||
attention_location_n_filters = 32
|
||
attention_location_kernel_size = 31
|
||
|
||
# Mel-post processing network parameters
|
||
postnet_embedding_dim = 512
|
||
postnet_kernel_size = 5
|
||
postnet_n_convolutions = 5
|
||
|
||
################################
|
||
# Optimization Hyperparameters #
|
||
################################
|
||
use_saved_learning_rate = False
|
||
learning_rate = 1e-3
|
||
weight_decay = 1e-6
|
||
grad_clip_thresh = 1.0
|
||
batch_size = 2 #64
|
||
mask_padding = True # set model's padded outputs to padded values
|
||
|
||
|
||
def create_hparams(hparams_string=None, verbose=False):
|
||
return hparams
|