@inproceedings{f427c695c05a4146877850d77da9f1dc,
title = "Efficient file accessing techniques on hadoop distributed file systems",
abstract = "Hadoop framework emerged at the right moment when traditional tools were powerless in terms of handling big data. Hadoop Distributed File System (HDFS) which serves as a highly fault-tolerance distributed file system in Hadoop, can improve the throughput of data access effectively. It is very suitable for the application of handling large amounts of datasets. However, Hadoop has the disadvantage that the memory usage rate in NameNode is so high when processing large amounts of small files that it has become the limit of the whole system. In this paper, we propose an approach to optimize the performance of HDFS with small files. The basic idea is to merge small files into a large one whose size is suitable for a block. Furthermore, indexes are built to meet the requirements for fast access to all files in HDFS. Preliminary experiment results show that our approach achieves better performance.",
keywords = "HDFS, Hadoop, Index, Small files",
author = "Wei Qu and Siyao Cheng and Hongzhi Wang",
note = "Publisher Copyright: {\textcopyright} Springer Science+Business Media Singapore 2016.; 2nd International Conference on Young Computer Scientists, Engineers and Educators, ICYCSEE 2016 ; Conference date: 20-08-2016 Through 22-08-2016",
year = "2016",
doi = "10.1007/978-981-10-2053-7\_31",
language = "英语",
isbn = "9789811020520",
series = "Communications in Computer and Information Science",
publisher = "Springer Verlag",
pages = "350--361",
editor = "Wanxiang Che and Hongzhi Wang and Shaoliang Peng and Weipeng Jing and Guanglu Sun and Xianhua Song and Zeguang Lu and Qilong Han and Junyu Lin and Hongtao Song",
booktitle = "Social Computing - 2nd International Conference of Young Computer Scientists, Engineers and Educators, ICYCSEE 2016, Proceedings",
address = "德国",
}