@inproceedings{519852655fc64a7996ace3da0ac68c03,
title = "Bilingual lexicon extraction with forced correlation from comparable corpora",
abstract = "Recently a simple linear transformation with word embedding has been found to be highly effective to extract a bilingual lexicon from comparable corpora. However, the pairs of bilingual word embedding for training this transformation are assumed to satisfy a linear relationship automatically which actually can{\textquoteright}t be guaranteed absolutely in practice. This paper proposes a simple solution based on canonical correlation analysis (CCA) which forces the bilingual word embedding for training the transformation to be maximally linearly correlated onto the projection subspaces. After projecting the original word embedding into the new correlation subspace in two languages, a better transformation matrix is again learned with the new projected word embeddings as before. The experimental results confirm that the proposed solution can achieve a significant improvement of 62\% in the precision at Top-1 over the baseline approach on the English-to-Chinese bilingual lexicon extraction task.",
author = "Chunyue Zhang and Tiejun Zhao",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2015.; 22nd International Conference on Neural Information Processing, ICONIP 2015 ; Conference date: 09-11-2015 Through 12-11-2015",
year = "2015",
doi = "10.1007/978-3-319-26535-3\_60",
language = "英语",
isbn = "9783319265346",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "528--535",
editor = "Lai, \{Weng Kin\} and Qingshan Liu and Tingwen Huang and Sabri Arik",
booktitle = "Neural Information Processing - 22nd International Conference, ICONIP 2015, Proceedings",
address = "德国",
}