@inproceedings{8eba76dbdb1a4d478fdc43837e48e265,
title = "Think Beyond Words: Exploring Context-Relevant Visual Commonsense for Diverse Dialogue Generation",
abstract = "Commonsense knowledge has been widely considered for building intelligent open-domain dialogue agents, aiming to generate meaningful and diverse responses. Previous works in this field usually lack the ability to effectively obtain and utilize auxiliary commonsense from the external visual world. In this paper, we argue that exploiting logical information in images related to context can be effective to enrich and steer the generation process. In view of this, we propose VICTOR, a context-relevant VIsual Commonsense enhanced dialogue generaTOR for generating coherent and informative responses. To obtain the associated visual commonsense, we devise a novel approach that expands topic words on the knowledge graph and maps them into daily scenarios. During the generation, the model adopts multimodal fusion mechanism to integrate visual and textual information, and adaptively combine their decoding distributions for better response generation. The experimental results on two public datasets show that our proposed method outperforms the latest competitive methods in terms of coherence and diversity.",
author = "Yiting Liu and Liang Li and Beichen Zhang and Qingming Huang",
note = "Publisher Copyright: {\textcopyright} 2022 Association for Computational Linguistics.; 2022 Findings of the Association for Computational Linguistics: EMNLP 2022 ; Conference date: 07-12-2022 Through 11-12-2022",
year = "2022",
doi = "10.18653/v1/2022.findings-emnlp.250",
language = "英语",
series = "Findings of the Association for Computational Linguistics: EMNLP 2022",
publisher = "Association for Computational Linguistics (ACL)",
pages = "3106--3117",
editor = "Yoav Goldberg and Zornitsa Kozareva and Yue Zhang",
booktitle = "Findings of the Association for Computational Linguistics",
address = "澳大利亚",
}