@inproceedings{fa74bacd80444661a66a76cc68b14030,
title = "A KL divergence and DNN approach to cross-lingual TTS",
abstract = "We propose a Kullback-Leibler divergence (KLD) and deep neural net (DNN) based approach to cross-lingual TTS (CL-TTS) training. A speaker independent DNN (SI-DNN) ASR is used to equalize the speaker difference between a source speaker in L1 and a reference speaker in L2. Two speaker dependent GMM-HMM parametric TTS systems are first trained in the respective languages. The senones sets of the two TTS are matched in the SI-DNN ASR in terms of their output posteriors distributions in KLD. The minimum KLD criterion is used to transform the senones in the source speaker's TTS (L1) to the corresponding «closest» senones in the target language (L2). The new CL-TTS thus trained has been shown to achieve high speaker similarity to the source speaker in L1 while high intelligibility and naturalness are preserved. For untranscribed source speaker's recordings, say, conversational speech, a frame mapping, instead of «senone mapping» is also proposed to achieve a high but slightly inferior CL-TTS.",
keywords = "Kullback-Leibler divergence, cross-lingual, deep neural networks, speech synthesis",
author = "Xie, \{Feng Long\} and Soong, \{Frank K.\} and Haifeng Li",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 41st IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2016 ; Conference date: 20-03-2016 Through 25-03-2016",
year = "2016",
month = may,
day = "18",
doi = "10.1109/ICASSP.2016.7472732",
language = "英语",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "5515--5519",
booktitle = "2016 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2016 - Proceedings",
address = "美国",
}