@inproceedings{9ec8a1537b8747bcbc97ddf94599e509,
title = "Range query estimation for dirty data management system",
abstract = "In recent years, data quality issues have attracted wide attention. Data quality is mainly caused by dirty data. Currently, many methods for dirty data management have been proposed, and one of them is entity-based relational database in which one tuple represents an entity. The traditional query optimizations having the ability to estimate the cost of execution of a query plan have not been suitable for the new entity-based model. Then new query optimizations need to be developed. In this paper, we propose new query selectivity estimation based on histogram, and focus on solving the overestimation which traditional methods lead to. We prove our approaches are unbiased. The experimental results on both real and synthetic data sets show that our approaches can give good estimates with low error.",
keywords = "data quality, dirty data, histogram, query estimation",
author = "Yan Zhang and Long Yang and Hongzhi Wang",
year = "2012",
doi = "10.1007/978-3-642-32281-5\_15",
language = "英语",
isbn = "9783642322808",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
pages = "152--164",
booktitle = "Web-Age Information Management - 13th International Conference, WAIM 2012, Proceedings",
note = "13th International Conference on Web-Age Information Management, WAIM 2012 ; Conference date: 18-08-2012 Through 20-08-2012",
}