@inproceedings{de8b1bb268c14b58a26e357629f664bd,
title = "InfoMin-based Query Embedding Optimization For Query-based Universal Sound Separation",
abstract = "The query-based universal sound separation (QUSS) has been addressed, aiming to perform the separation of specific sound sources based on a given query. Most of existed methods focus on the improvement of separation models, ignoring the influence of category-conditioned query embedding distribution on separation performance. To address this issue, we propose an optimization method for query embedding that reduces mutual information (MI) between query embeddings while keeping task-related information intact, named the InfoMin principle. In addition, we propose the Frequency-varying Feature-wise Linear Modulation (FFiLM), which leverages frequency band differences in acoustic events to enhance the modulation capability of query embedding and improve the performance of the separation model. Experimental results show that our method achieves considerable improvements over the existing SoTA method.",
keywords = "contrastive learning, query-based universal sound separation, source separation",
author = "Zhen Wang and Jiqing Han and Liwen Zhang and Youcheng Zhang",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2025 ; Conference date: 06-04-2025 Through 11-04-2025",
year = "2025",
doi = "10.1109/ICASSP49660.2025.10887870",
language = "英语",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
editor = "Rao, \{Bhaskar D\} and Isabel Trancoso and Gaurav Sharma and Mehta, \{Neelesh B.\}",
booktitle = "2025 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2025 - Proceedings",
address = "美国",
}