@inproceedings{4ae9ad6704974d10b74c88fdc0d6152d,
title = "SepJoin: A Distributed Stream Join System with Low Latency and High Throughput",
abstract = "In the field of real-time analytics, stream joins are the basis for complex queries and greatly affect system performance. In order to satisfy the real-time requirements of streaming applications, the system imposes high requirements on the latency and throughput of the stream join operator. In this paper, we model the latency and throughput of distributed stream join systems based on queuing theory. Based on the analysis of this model, we demonstrate the impact of indexing-related overhead on the latency and throughput of stream join systems and propose a new distributed stream join system, SepJoin, which is oriented to the hash join problem. SepJoin reduces the number of tuples stored in each processing unit belonging to each input stream by designing a novel partitioning scheme that uses as many processing units as possible to store tuples belonging to each input stream, thereby reducing the index-related overhead of each processing unit when performing join operations and ultimately achieving performance benefits in terms of latency and throughput. We provide both theoretical analysis and extensive experimental evaluations to evaluate the processing latency and max throughput of SepJoin.",
keywords = "big data, distributed stream join system, partitioning scheme, queuing theory",
author = "Qihang Wang and Decheng Zuo and Zhan Zhang and Siyuan Chen and Tianming Liu",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 28th IEEE International Conference on Parallel and Distributed Systems, ICPADS 2022 ; Conference date: 10-01-2023 Through 12-01-2023",
year = "2023",
doi = "10.1109/ICPADS56603.2022.00088",
language = "英语",
series = "Proceedings of the International Conference on Parallel and Distributed Systems - ICPADS",
publisher = "IEEE Computer Society",
pages = "633--640",
booktitle = "Proceedings - 2022 IEEE 28th International Conference on Parallel and Distributed Systems, ICPADS 2022",
address = "美国",
}