@inproceedings{1294481764b14c649828cbf1f1bf718b,
title = "A data cleaning framework based on user feedback",
abstract = "In this paper, we present our design of a data cleaning framework that combines interaction of data quality rules (CFDS, CINDS and MDs) with user feedback through an interactive process. First, to generate candidate repairs for each potentially dirty attribute, we propose an optimization model based on genetic algorithm. We then create a Bayesian machine learning model with several committees to predict the correctness of the repair and rank these repairs by uncertainly score to improve the learned model. User feedback is used to decide whether the model is accurate while inspecting the suggestions. Finally, our experiments on real-world datasets show significant improvement in data quality.",
keywords = "Bayesian decision, Data clean, Data quality rules, User feedback",
author = "Hui Xie and Hongzhi Wang and Jianzhong Li and Hong Gao",
year = "2013",
doi = "10.1007/978-3-642-38562-9\_52",
language = "英语",
isbn = "9783642385612",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "514--520",
booktitle = "Web-Age Information Management - 14th International Conference, WAIM 2013, Proceedings",
address = "德国",
note = "14th International Conference on Web-Age Information Management, WAIM 2013 ; Conference date: 14-06-2013 Through 16-06-2013",
}