@inproceedings{c6ee1023473847a1bb809551fb76d822,
title = "SAPT: A Shared Attention Framework for Parameter-Efficient Continual Learning of Large Language Models",
abstract = "The continual learning (CL) ability is vital for deploying large language models (LLMs) in the dynamic world. Existing methods devise the learning module to acquire task-specific knowledge with parameter-efficient tuning (PET) block and the selection module to pick out the corresponding one for the testing input, aiming at handling the challenges of catastrophic forgetting and knowledge transfer in CL. However, these methods tend to address only one of the challenges, ignoring the potential of aligning the two modules to effectively address catastrophic forgetting and knowledge transfer simultaneously. To this end, we propose a novel Shared Attention Framework (SAPT), to align the PET learning and selection via the Shared Attentive Learning \& Selection module. Extensive experiments on two CL benchmarks demonstrate the superiority of SAPT. Moreover, SAPT consistently demonstrates its superiority when we scale it to different model sizes (from 770M to 13B), different model architectures (T5 and LLaMA-2) and unseen tasks.",
author = "Weixiang Zhao and Shilong Wang and Yulin Hu and Yanyan Zhao and Bing Qin and Xuanyu Zhang and Qing Yang and Dongliang Xu and Wanxiang Che",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 62nd Annual Meeting of the Association for Computational Linguistics, ACL 2024 ; Conference date: 11-08-2024 Through 16-08-2024",
year = "2024",
doi = "10.18653/v1/2024.acl-long.625",
language = "英语",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "11641--11661",
editor = "Lun-Wei Ku and Martins, \{Andre F. T.\} and Vivek Srikumar",
booktitle = "Long Papers",
address = "澳大利亚",
}