@inproceedings{59404747e0614cc38a3df2d8675f587a,
title = "A distributed load balance algorithm of mapreduce for data quality detection",
abstract = "Big data quality detection is a valuable problem in data quality field. MapReduce is an important distributed data processing model mainly for big data processing. Load balance is a key factor that influences the property of MapReduce. In this paper, we propose a distributed greedy approximation algorithm for load balance problem in MapReduce for data quality detection. There are three key challenges: (a) reduce the problem to NP-complete and prove a considerable approximation ratio of the proposed algorithm, (b) just impose one more round of MapReduce than conventional processing and occupy minimal time in the total process, (c) be simple and convenient feasible. Experimental results on real-life and synthetic data demonstrate that the proposed algorithm in this paper is effective for load balance.",
keywords = "Data quality detection, Distributed approximation greedy algorithm, Load balance, Mapreduce",
author = "Yitong Gao and Yan Zhang and Hongzhi Wang and Jianzhong Li and Hong Gao",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2016.; International Workshop on Database Systems for Advanced Applications, DASFAA 2016 ; Conference date: 16-04-2016 Through 19-04-2016",
year = "2016",
doi = "10.1007/978-3-319-32055-7\_24",
language = "英语",
isbn = "9783319320540",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "294--306",
editor = "Jinho Kim and Hong Gao and Yasushi Sakurai",
booktitle = "Database Systems for Advanced Applications - DASFAA 2016 International Workshops",
address = "德国",
}