@inproceedings{d2e607002e694332934dd16e139e1114,
title = "SlotFusion: Object-Centric Audiovisual Feature Fusion with Slot Attention for Remote Sensing Scene Recognition",
abstract = "Despite significant advancements in remote sensing multimodal learning, particularly in image-image feature fusion, the exploration of audio-image feature fusion remains insufficient. Given the complexity and redundancy of ground objects in remote sensing images, accurately aligning audio features with image features during the fusion process is a critical challenge. In this paper, we introduce an object-centric feature fusion method named SlotFusion. By employing a slot attention-based feature decoupling module and a slot-based audiovisual feature fusion module, we transform modality features with complex semantic information into a set of slot features corresponding to object units and use gated activation units to adaptively implement object-centric feature fusion. Experiments on the Audio Visual Aerial Scene Recognition dataset (ADVANCE) demonstrate that the proposed SlotFusion significantly improves remote sensing scene recognition performance, with a 7.04\% increase in overall accuracy compared to previous methods, achieving state-of-the-art results.",
keywords = "Audiovisual feature fusion, Gated feature fusion, Object-centric learning, Slot attention",
author = "Fangzhou Han and Tianyi Yu and Lamei Zhang and Lingyu Si and Yiqi Zhang",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2025 ; Conference date: 06-04-2025 Through 11-04-2025",
year = "2025",
doi = "10.1109/ICASSP49660.2025.10888715",
language = "英语",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
editor = "Rao, \{Bhaskar D\} and Isabel Trancoso and Gaurav Sharma and Mehta, \{Neelesh B.\}",
booktitle = "2025 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2025 - Proceedings",
address = "美国",
}