@inproceedings{7914f6392cf94a43a12d88799ec39f6a,
title = "On the Complementarity between Pre-Training and Back-Translation for Neural Machine Translation",
abstract = "Pre-training (PT) and back-translation (BT) are two simple and powerful methods to utilize monolingual data for improving the model performance of neural machine translation (NMT). This paper takes the first step to investigate the complementarity between PT and BT. We introduce two probing tasks for PT and BT respectively and find that PT mainly contributes to the encoder module while BT brings more benefits to the decoder. Experimental results show that PT and BT are nicely complementary to each other, establishing state-ofthe-art performances on the WMT16 EnglishRomanian and English-Russian benchmarks. Through extensive analyses on sentence originality and word frequency, we also demonstrate that combining Tagged BT with PT is more helpful to their complementarity, leading to better translation quality. Source code is freely available at https://github.com/ SunbowLiu/PTvsBT.",
author = "Xuebo Liu and Longyue Wang and Wong, \{Derek F.\} and Liang Ding and Chao, \{Lidia S.\} and Shuming Shi and Zhaopeng Tu",
note = "Publisher Copyright: {\textcopyright} 2021 Association for Computational Linguistics.; 2021 Findings of the Association for Computational Linguistics, Findings of ACL: EMNLP 2021 ; Conference date: 07-11-2021 Through 11-11-2021",
year = "2021",
doi = "10.18653/v1/2021.findings-emnlp.247",
language = "英语",
series = "Findings of the Association for Computational Linguistics, Findings of ACL: EMNLP 2021",
publisher = "Association for Computational Linguistics (ACL)",
pages = "2900--2907",
editor = "Marie-Francine Moens and Xuanjing Huang and Lucia Specia and Yih, \{Scott Wen-Tau\}",
booktitle = "Findings of the Association for Computational Linguistics, Findings of ACL",
address = "澳大利亚",
}