@inproceedings{0699c5be1d1e4e8e802e9f3c9d5fe22b,
title = "CRED-SQL: Enhancing Real-World Large Scale Database Text-to-SQL Parsing Through Cluster Retrieval and Execution Description",
abstract = "Recent advances in large language models (LLMs) have significantly improved the accuracy of Text-to-SQL systems. However, a critical challenge remains: the semantic mismatch between natural language questions (NLQs) and their corresponding SQL queries. This issue is exacerbated in large-scale databases, where semantically similar attributes hinder schema linking and semantic drift during SQL generation, ultimately reducing model accuracy. To address these challenges, we introduce CRED-SQL, a framework designed for large-scale databases that integrates Cluster Retrieval and Execution Description. CRED-SQL first performs cluster-based large-scale schema retrieval to pinpoint the tables and columns most relevant to a given NLQ, alleviating schema mismatch. It then introduces an intermediate natural language representation - Execution Description Language (EDL) - to bridge the gap between NLQs and SQL. This reformulation decomposes the task into two stages: Text-to-EDL and EDL-to-SQL, leveraging LLMs' strong general reasoning capabilities while reducing semantic deviation. Extensive experiments on two large-scale, cross-domain benchmarks - SpiderUnion and BirdUnion - demonstrate that CRED-SQL achieves new state-of-the-art (SOTA) performance, validating its effectiveness and scalability. Our code is available at https://github.com/smduan/CRED-SQL.git.",
author = "Shaoming Duan and Zirui Wang and Chuanyi Liu and Zhibin Zhu and Yuhao Zhang and Peiyi Han and Liang Yan and Zewu Peng",
note = "Publisher Copyright: {\textcopyright} 2025 The Authors.; 28th European Conference on Artificial Intelligence, ECAI 2025, including 14th Conference on Prestigious Applications of Intelligent Systems, PAIS 2025 ; Conference date: 25-10-2025 Through 30-10-2025",
year = "2025",
month = oct,
day = "21",
doi = "10.3233/FAIA251337",
language = "英语",
series = "Frontiers in Artificial Intelligence and Applications",
publisher = "IOS Press BV",
pages = "4394--4401",
editor = "Ines Lynce and Nello Murano and Mauro Vallati and Serena Villata and Federico Chesani and Michela Milano and Andrea Omicini and Mehdi Dastani",
booktitle = "ECAI 2025 - 28th European Conference on Artificial Intelligence, including 14th Conference on Prestigious Applications of Intelligent Systems, PAIS 2025 - Proceedings",
address = "荷兰",
}