PaimonVoice/hparams.py

89 lines
2.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import tensorflow as tf
from text import symbols
class hparams:
def __init__(self) -> None:
super().__init__()
################################
# Experiment Parameters #
################################
epochs = 3 #500
iters_per_checkpoint = 1000
seed = 1234
dynamic_loss_scaling = True
fp16_run = False
distributed_run = False
dist_backend = "nccl"
dist_url = "tcp://localhost:54321"
cudnn_enabled = True
cudnn_benchmark = True
ignore_layers = ['embedding.weight']
################################
# Data Parameters #
################################
load_mel_from_disk = False #实际上是区别用 numpy读wav 还是用scipy读wav
training_files = 'filelists/zh_audio_text_train_filelist.txt'
validation_files = 'filelists/zh_audio_text_val_filelist.txt'
text_cleaners = ['english_cleaners']
################################
# Audio Parameters #
################################
max_wav_value = 32768.0
sampling_rate = 22050 #22050
filter_length = 1024
hop_length = 256
win_length = 1024
n_mel_channels = 80
mel_fmin = 0.0
mel_fmax = 8000.0
################################
# Model Parameters #
################################
n_symbols = len(symbols)
symbols_embedding_dim = 512
# Encoder parameters
encoder_kernel_size = 5
encoder_n_convolutions = 3
encoder_embedding_dim = 512
# Decoder parameters
n_frames_per_step = 1 # currently only 1 is supported
decoder_rnn_dim = 1024
prenet_dim = 256
max_decoder_steps = 1000
gate_threshold = 0.5
p_attention_dropout = 0.1
p_decoder_dropout = 0.1
# Attention parameters
attention_rnn_dim = 1024
attention_dim = 128
# Location Layer parameters
attention_location_n_filters = 32
attention_location_kernel_size = 31
# Mel-post processing network parameters
postnet_embedding_dim = 512
postnet_kernel_size = 5
postnet_n_convolutions = 5
################################
# Optimization Hyperparameters #
################################
use_saved_learning_rate = False
learning_rate = 1e-3
weight_decay = 1e-6
grad_clip_thresh = 1.0
batch_size = 2 #64
mask_padding = True # set model's padded outputs to padded values
def create_hparams(hparams_string=None, verbose=False):
return hparams