@inproceedings{bf80f13bb62047e28c681e4dd16390c9,
title = "A forwarding-based task scheduling algorithm for distributed web crawling over DHTs",
abstract = "Distributed Web crawling (DWC) over DHTs is proposed to solve the bottlenecks in the traditional Web crawling. The core of this kind of system is its fully distributed task scheduling mechanism in which the crawlers are treated as peers and the crawlees are treated as resources maintained by the peers. A system model based on the Content Addressable Network (CAN) can further optimize the scheduling mechanism by exploiting the network proximity of the crawlers and the crawlees. In this paper, we propose a new method for CAN in order to achieve load balancing in the CAN-based DWC system. The method not only keeps the load balancing among peers but also keeps the distance between peers and resources very short in our simulations. The shortened peer-resource distance fulfills the need of shortening crawler-crawlee latencies.",
keywords = "Content addressable network, DHT, Distributed web crawling, Task scheduling",
author = "Xiao Xu and Zhang, \{Wei Zhe\} and Zhang, \{Hong Li\} and Fang, \{Bin Xing\} and Liu, \{Xin Ran\}",
year = "2009",
doi = "10.1109/ICPADS.2009.29",
language = "英语",
isbn = "9780769539003",
series = "Proceedings of the International Conference on Parallel and Distributed Systems - ICPADS",
pages = "854--859",
booktitle = "ICPADS '09 - 15th International Conference on Parallel and Distributed Systems",
note = "15th International Conference on Parallel and Distributed Systems, ICPADS '09 ; Conference date: 08-12-2009 Through 11-12-2009",
}