@inproceedings{4454431533a449b6bda0765ff6856b1c,
title = "LPV: A log parser based on vectorization for offline and online log parsing",
abstract = "As the first and foremost step of typical automatic log analysis, log parsing has attracted a lot of interest. Most of existing studies treat log messages as pure strings and rely on string matching or string distance. In NLP, word2vec has shown very efficient and effective in representing words with low dimensional vectors. Inspired by this, in this paper we propose a novel method, called LPV (Log Parser based on Vectorization), for both offline and online log parsing. The central idea of our method in offline log parsing is to first convert log messages into vectors, and measure the similarity between two log messages by the distance between two vectors, then log messages can be clustered via clustering the vectors, and log templates can be extracted from the resulting clusters. For online log parsing, we also assign log templates with some kind of average vectors, so that the similarity between an incoming log message and each log template can also be measured by the distance between two vectors. We have conducted extensive experiments based on three widely used log datasets, and the results demonstrate that our proposed method LPV can achieve a competitive performance, compared against state-of-the-art log parsing methods.",
keywords = "Clustering, Log parsing, Log template extraction, Vectorization",
author = "Tong Xiao and Zhe Quan and Wang, \{Zhi Jie\} and Kaiqi Zhao and Xiangke Liao",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 20th IEEE International Conference on Data Mining, ICDM 2020 ; Conference date: 17-11-2020 Through 20-11-2020",
year = "2020",
month = nov,
doi = "10.1109/ICDM50108.2020.00175",
language = "英语",
series = "Proceedings - IEEE International Conference on Data Mining, ICDM",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1346--1351",
editor = "Claudia Plant and Haixun Wang and Alfredo Cuzzocrea and Carlo Zaniolo and Xindong Wu",
booktitle = "Proceedings - 20th IEEE International Conference on Data Mining, ICDM 2020",
address = "美国",
}