@inproceedings{1bd2230dc0594ea0b4a8a69aab2e1a0d,
title = "Imputation for categorical attributes with probabilistic reasoning",
abstract = "Since incompleteness affects the data usage, missing values in database should be estimated to make data mining and analysis more accurate. In addition to ignoring or setting to default values, many imputation methods have been proposed, but all of them have their limitations. This paper proposes a probabilistic method to estimate missing values. We construct a Bayesian network in a novel way to identify the dependencies in a dataset, then use the Bayesian reasoning process to find the most probable substitution for each missing value. The benefits of this method include (1) irrelevant attributes can be ignored during estimation; (2) network is built with no target attribute, which means all attributes are handled in one model;(3) probability information can be obtained to measure the accuracy of the imputation. Experimental results show that our construction algorithm is effective and the quality of filled values outperforms the mode imputation method and kNN method. We also verify the effectiveness of the probabilities given by our method experimentally.",
keywords = "Bayesian Network, Missing value imputation, Probabilistic reasoning",
author = "Lian Jin and Hongzhi Wang and Hong Gao",
year = "2013",
doi = "10.1007/978-3-642-38562-9\_9",
language = "英语",
isbn = "9783642385612",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "87--98",
booktitle = "Web-Age Information Management - 14th International Conference, WAIM 2013, Proceedings",
address = "德国",
note = "14th International Conference on Web-Age Information Management, WAIM 2013 ; Conference date: 14-06-2013 Through 16-06-2013",
}